diff --git a/.gitignore b/.gitignore index 5298f42..3b93ce0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,5 @@ .idea/ -*.h5 - __pycache__ -*.py[cod] -build/ -dist/ -*.egg-info/ +*.h5 +*.pyc diff --git a/README.md b/README.md index 731450b..5da2b6b 100644 --- a/README.md +++ b/README.md @@ -5,38 +5,32 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs). **Performance** highly depends on what hardware you have available: * A 395x345x73 cell simulation (~10 million points, 8-cell absorbing boundaries) - runs at around 91 iterations/sec. on my AMD RX480. -* On an Nvidia GTX 580, it runs at 66 iterations/sec -* On my laptop (Nvidia 940M) the same simulation achieves ~12 iterations/sec. + runs at around 42 iterations/sec. on my Nvidia GTX 580. +* On my laptop (Nvidia 940M) the same simulation achieves ~8 iterations/sec. * An L3 photonic crystal cavity ringdown simulation (1550nm source, 40nm - discretization, 8000 steps) takes about 3 minutes on my laptop. + discretization, 8000 steps) takes about 5 minutes on my laptop. **Capabilities** are currently pretty minimal: * Absorbing boundaries (CPML) -* Perfect electrical conductors (PECs; to use set epsilon to inf) +* Conducting boundaries (PMC) * Anisotropic media (eps_xx, eps_yy, eps_zz, mu_xx, ...) * Direct access to fields (eg., you can trivially add a soft or hard - current source with just sim.E[ind] += sin(f0 * t), or save any portion + current source with just sim.E[1] += sin(f0 * t), or save any portion of a field to a file) - ## Installation **Requirements:** * python 3 (written and tested with 3.5) * numpy * pyopencl -* jinja2 -* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools) - -Optional (used for examples): -* dill (for file output) -* [gridlock](https://mpxd.net/code/jan/gridlock) -* [masque](https://mpxd.net/code/jan/masque) +* h5py (for file output) +* [gridlock](https://mpxd.net/gogs/jan/gridlock) +* [masque](https://mpxd.net/gogs/jan/masque) To get the code, just clone this repository: ```bash -git clone https://mpxd.net/code/jan/opencl_fdtd.git +git clone https://mpxd.net/gogs/jan/opencl_fdtd.git ``` You can install the requirements and their dependencies easily with diff --git a/fdtd.py b/fdtd.py index 99af07b..0ce560e 100644 --- a/fdtd.py +++ b/fdtd.py @@ -6,23 +6,14 @@ See main() for simulation setup. import sys import time -import logging import numpy -import lzma -import dill +import h5py -from opencl_fdtd import Simulation +from fdtd.simulation import Simulation from masque import Pattern, shapes import gridlock import pcgen -import fdfd_tools - - -__author__ = 'Jan Petykiewicz' - -logging.basicConfig(level=logging.DEBUG) -logger = logging.getLogger(__name__) def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern: @@ -84,7 +75,7 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern: def main(): max_t = 8000 # number of timesteps - dx = 25 # discretization (nm/cell) + dx = 40 # discretization (nm/cell) pml_thickness = 8 # (number of cells) wl = 1550 # Excitation wavelength and fwhm @@ -105,8 +96,7 @@ def main(): z_max = 1.6 * a xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx - # Coordinates of the edges of the cells. - # The fdtd package can only do square grids at the moment. + # Coordinates of the edges of the cells. The fdtd package can only do square grids at the moment. half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max] edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords] @@ -124,13 +114,19 @@ def main(): eps=n_air**2, polygons=mask.as_polygons()) - logger.info('grid shape: {}'.format(grid.shape)) + print(grid.shape) # #### Create the simulation grid #### - pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness} - for a in 'xyz' for p in 'np'] - #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x'] - bloch = [] - sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch) + sim = Simulation(grid.grids) + + # Conducting boundaries and pmls in every direction + c_args = [] + pml_args = [] + for d in (0, 1, 2): + for p in (-1, 1): + c_args += [{'direction': d, 'polarity': p}] + pml_args += [{'direction': d, 'polarity': p, 'epsilon_eff': n_slab**2}] + sim.init_conductors(c_args) + sim.init_cpml(pml_args) # Source parameters and function w = 2 * numpy.pi * dx / wl @@ -142,54 +138,30 @@ def main(): t0 = i * sim.dt - delay return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2) - with open('sources.c', 'w') as f: - f.write(sim.sources['E']) - f.write('\n====================H======================\n') - f.write(sim.sources['H']) - if sim.update_S: - f.write('\n=====================S=====================\n') - f.write(sim.sources['S']) - if bloch: - f.write('\n=====================F=====================\n') - f.write(sim.sources['F']) - f.write('\n=====================G=====================\n') - f.write(sim.sources['G']) - # #### Run a bunch of iterations #### - # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued - # immediately and run once prev_event is finished. + # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued immediately and run + # once prev_event is finished. + output_file = h5py.File('simulation_output.h5', 'w') start = time.perf_counter() for t in range(max_t): - e = sim.update_E([]) - if bloch: - e = sim.update_F([e]) - e.wait() + event = sim.cpml_E([]) + sim.update_E([event]).wait() - ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape) - sim.E[ind] += field_source(t) - e = sim.update_H([]) - if bloch: - e = sim.update_G([e]) - if sim.update_S: - e = sim.update_S([e]) - e.wait() + sim.E[1][tuple(grid.shape//2)] += field_source(t) + event = sim.conductor_E([]) + event = sim.cpml_H([event]) + event = sim.update_H([event]) + sim.conductor_H([event]).wait() - if t % 100 == 0: - logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) - sys.stdout.flush() - - with lzma.open('saved_simulation', 'wb') as f: - def unvec(f): - return fdfd_tools.unvec(f, grid.shape) - d = { - 'grid': grid, - 'E': unvec(sim.E.get()), - 'H': unvec(sim.H.get()), - } - if sim.S is not None: - d['S'] = unvec(sim.S.get()) - dill.dump(d, f) + print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) + sys.stdout.flush() + # Save field slices + if (t % 20 == 0 and (max_t - t < 1000 or t < 1000)) or t == max_t-1: + print('saving E-field') + for j, f in enumerate(sim.E): + a = f.get() + output_file['/E{}_t{}'.format('xyz'[j], t)] = a[:, :, round(a.shape[2]/2)] if __name__ == '__main__': main() diff --git a/fdtd/__init__.py b/fdtd/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fdtd/base.py b/fdtd/base.py new file mode 100644 index 0000000..6285fb9 --- /dev/null +++ b/fdtd/base.py @@ -0,0 +1,72 @@ +""" +Basic code snippets for opencl FDTD +""" + +from typing import List +import numpy + + +def shape_source(shape: List[int] or numpy.ndarray) -> str: + """ + Defines sx, sy, sz C constants specifying the shape of the grid in each of the 3 dimensions. + + :param shape: [sx, sy, sz] values. + :return: String containing C source. + """ + sxyz = """ +// Field sizes +const int sx = {shape[0]}; +const int sy = {shape[1]}; +const int sz = {shape[2]}; +""".format(shape=shape) + return sxyz + +# Defines dix, diy, diz constants used for stepping in the x, y, z directions in a linear array +# (ie, given Ex[i] referring to position (x, y, z), Ex[i+diy] will refer to position (x, y+1, z)) +dixyz_source = """ +// Convert offset in field xyz to linear index offset +const int dix = sz * sy; +const int diy = sz; +const int diz = 1; +""" + +# Given a linear index i and shape sx, sy, sz, defines x, y, and z as the 3D indices of the current element (i). +xyz_source = """ +// Convert linear index to field index (xyz) +const int x = i / (sz * sy); +const int y = (i - x * sz * sy) / sz; +const int z = (i - y * sz - x * sz * sy); +""" + +# Source code for updating the E field; maxes use of dixyz_source. +maxwell_E_source = """ +// E update equations +Ex[i] += dt / epsx[i] * ((Hz[i] - Hz[i-diy]) - (Hy[i] - Hy[i-diz])); +Ey[i] += dt / epsy[i] * ((Hx[i] - Hx[i-diz]) - (Hz[i] - Hz[i-dix])); +Ez[i] += dt / epsz[i] * ((Hy[i] - Hy[i-dix]) - (Hx[i] - Hx[i-diy])); +""" + +# Source code for updating the H field; maxes use of dixyz_source and assumes mu=0 +maxwell_H_source = """ +// H update equations +Hx[i] -= dt * ((Ez[i+diy] - Ez[i]) - (Ey[i+diz] - Ey[i])); +Hy[i] -= dt * ((Ex[i+diz] - Ex[i]) - (Ez[i+dix] - Ez[i])); +Hz[i] -= dt * ((Ey[i+dix] - Ey[i]) - (Ex[i+diy] - Ex[i])); +""" + + +def type_to_C(float_type: numpy.float32 or numpy.float64) -> str: + """ + Returns a string corresponding to the C equivalent of a numpy type. + Only works for float32 and float64. + + :param float_type: numpy.float32 or numpy.float64 + :return: string containing the corresponding C type (eg. 'double') + """ + if float_type == numpy.float32: + arg_type = 'float' + elif float_type == numpy.float64: + arg_type = 'double' + else: + raise Exception('Unsupported type') + return arg_type diff --git a/fdtd/boundary.py b/fdtd/boundary.py new file mode 100644 index 0000000..31dfac8 --- /dev/null +++ b/fdtd/boundary.py @@ -0,0 +1,194 @@ +from typing import List, Dict +import numpy + + +def conductor(direction: int, + polarity: int, + ) -> List[str]: + """ + Create source code for conducting boundary conditions. + + :param direction: integer in range(3), corresponding to x,y,z. + :param polarity: -1 or 1, specifying eg. a -x or +x boundary. + :return: [E_source, H_source] source code for E and H boundary update steps. + """ + if direction not in range(3): + raise Exception('Invalid direction: {}'.format(direction)) + + if polarity not in (-1, 1): + raise Exception('Invalid polarity: {}'.format(polarity)) + + r = 'xyz'[direction] + uv = 'xyz'.replace(r, '') + + if polarity < 0: + bc_e = """ +if ({r} == 0) {{ + E{r}[i] = 0; + E{u}[i] = E{u}[i+di{r}]; + E{v}[i] = E{v}[i+di{r}]; +}} +""" + bc_h = """ +if ({r} == 0) {{ + H{r}[i] = H{r}[i+di{r}]; + H{u}[i] = 0; + H{v}[i] = 0; +}} +""" + + elif polarity > 0: + bc_e = """ +if ({r} == s{r} - 1) {{ + E{r}[i] = -E{r}[i-2*di{r}]; + E{u}[i] = +E{u}[i-di{r}]; + E{v}[i] = +E{v}[i-di{r}]; +}} else if ({r} == s{r} - 2) {{ + E{r}[i] = 0; +}} +""" + bc_h = """ +if ({r} == s{r} - 1) {{ + H{r}[i] = +H{r}[i-di{r}]; + H{u}[i] = -H{u}[i-2*di{r}]; + H{v}[i] = -H{v}[i-2*di{r}]; +}} else if ({r} == s{r} - 2) {{ + H{u}[i] = 0; + H{v}[i] = 0; +}} +""" + else: + raise Exception() + + replacements = {'r': r, 'u': uv[0], 'v': uv[1]} + return [s.format(**replacements) for s in (bc_e, bc_h)] + + +def cpml(direction: int, + polarity: int, + dt: float, + thickness: int=8, + epsilon_eff: float=1, + ) -> Dict: + """ + Generate source code for complex phase matched layer (cpml) absorbing boundaries. + These are not full boundary conditions and require a conducting boundary to be added + in the same direction as the pml. + + :param direction: integer in range(3), corresponding to x, y, z directions. + :param polarity: -1 or 1, corresponding to eg. -x or +x direction. + :param dt: timestep used by the simulation + :param thickness: Number of cells used by the pml (the grid is NOT expanded to add these cells). Default 8. + :param epsilon_eff: Effective epsilon_r of the pml layer. Default 1. + :return: Dict with entries 'E', 'H' (update equations for E and H) and 'psi_E', 'psi_H' (lists of str, + specifying the field names of the cpml fields used in the E and H update steps. Eg., + Psi_xn_Ex for the complex Ex component for the x- pml.) + """ + if direction not in range(3): + raise Exception('Invalid direction: {}'.format(direction)) + + if polarity not in (-1, 1): + raise Exception('Invalid polarity: {}'.format(polarity)) + + if thickness <= 2: + raise Exception('It would be wise to have a pml with 4+ cells of thickness') + + if epsilon_eff <= 0: + raise Exception('epsilon_eff must be positive') + + m = (3.5, 1) + sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(epsilon_eff) + alpha_max = 0 # TODO: Decide what to do about non-zero alpha + transverse = numpy.delete(range(3), direction) + + r = 'xyz'[direction] + np = 'nVp'[numpy.sign(polarity)+1] + uv = ['xyz'[i] for i in transverse] + + xe = numpy.arange(1, thickness+1, dtype=float)[::-1] + xh = numpy.arange(1, thickness+1, dtype=float)[::-1] + if polarity > 0: + xe -= 0.5 + elif polarity < 0: + xh -= 0.5 + + def par(x): + sigma = ((x / thickness) ** m[0]) * sigma_max + alpha = ((1 - x / thickness) ** m[1]) * alpha_max + p0 = numpy.exp(-(sigma + alpha) * dt) + p1 = sigma / (sigma + alpha) * (p0 - 1) + return p0, p1 + p0e, p1e = par(xe) + p0h, p1h = par(xh) + + def create_table(name, xs, type='float'): + s = type + ''' {name}(int x) {{ + switch (x) {{ +'''.format(name=name) + for i, x in enumerate(xs): + s += ''' case {i}: return {x}; +'''.format(i=i, x=x) + s += ''' + } +}''' + return s + + vals = {'r': r, + 'u': uv[0], + 'v': uv[1], + 'np': np, + 'th': thickness, + #'p0e': ', '.join((str(x) for x in p0e)), + #'p1e': ', '.join((str(x) for x in p1e)), + #'p0h': ', '.join((str(x) for x in p0h)), + #'p1h': ', '.join((str(x) for x in p1h)), + 'se': '-+'[direction % 2], + 'sh': '+-'[direction % 2]} + + if polarity < 0: + bounds_if = """ +if ( 0 < {r} && {r} < {th} + 1 ) {{ + const int ir = {r} - 1; // index into pml parameters + const int ip = {v} + {u} * s{v} + ir * s{v} * s{u}; // linear index into Psi +""" + elif polarity > 0: + bounds_if = """ +if ( (s{r} - 1) > {r} && {r} > (s{r} - 1) - ({th} + 1) ) {{ + const int ir = (s{r} - 1) - ({r} + 1); // index into pml parameters + const int ip = {v} + {u} * s{v} + ir * s{v} * s{u}; // linear index into Psi +""" + else: + raise Exception('Bad polarity (=0)') + + code_e = """ + Psi_{r}{np}_E{u}[ip] = p0e(ir) * Psi_{r}{np}_E{u}[ip] + p1e(ir) * (H{v}[i] - H{v}[i-di{r}]); + Psi_{r}{np}_E{v}[ip] = p0e(ir) * Psi_{r}{np}_E{v}[ip] + p1e(ir) * (H{u}[i] - H{u}[i-di{r}]); + + E{u}[i] {se}= dt / eps{u}[i] * Psi_{r}{np}_E{u}[ip]; + E{v}[i] {sh}= dt / eps{v}[i] * Psi_{r}{np}_E{v}[ip]; +}} +""" + code_h = """ + Psi_{r}{np}_H{u}[ip] = p0h(ir) * Psi_{r}{np}_H{u}[ip] + p1h(ir) * (E{v}[i+di{r}] - E{v}[i]); + Psi_{r}{np}_H{v}[ip] = p0h(ir) * Psi_{r}{np}_H{v}[ip] + p1h(ir) * (E{u}[i+di{r}] - E{u}[i]); + + H{u}[i] {sh}= dt * Psi_{r}{np}_H{u}[ip]; + H{v}[i] {se}= dt * Psi_{r}{np}_H{v}[ip]; +}} +""" + + pml_data = { + 'E': (bounds_if + code_e).format(**vals), + 'H': (bounds_if + code_h).format(**vals), + 'psi_E': ['Psi_{r}{np}_E{u}'.format(**vals), + 'Psi_{r}{np}_E{v}'.format(**vals)], + 'psi_H': ['Psi_{r}{np}_H{u}'.format(**vals), + 'Psi_{r}{np}_H{v}'.format(**vals)], + 'tables': create_table('p0e', p0e) + \ + create_table('p1e', p1e) + \ + create_table('p0h', p0h) + \ + create_table('p1h', p1h) + + } + + return pml_data diff --git a/fdtd/simulation.py b/fdtd/simulation.py new file mode 100644 index 0000000..d3a33df --- /dev/null +++ b/fdtd/simulation.py @@ -0,0 +1,209 @@ +""" +Class for constructing and holding the basic FDTD operations and fields +""" + +from typing import List, Dict, Callable +import numpy +import warnings + +import pyopencl +import pyopencl.array +from pyopencl.elementwise import ElementwiseKernel + +from . import boundary, base +from .base import type_to_C + + +class Simulation(object): + """ + Constructs and holds the basic FDTD operations and related fields + """ + E = None # type: List[pyopencl.array.Array] + H = None # type: List[pyopencl.array.Array] + eps = None # type: List[pyopencl.array.Array] + dt = None # type: float + + arg_type = None # type: numpy.float32 or numpy.float64 + + context = None # type: pyopencl.Context + queue = None # type: pyopencl.CommandQueue + + update_E = None # type: Callable[[],pyopencl.Event] + update_H = None # type: Callable[[],pyopencl.Event] + + conductor_E = None # type: Callable[[],pyopencl.Event] + conductor_H = None # type: Callable[[],pyopencl.Event] + + cpml_E = None # type: Callable[[],pyopencl.Event] + cpml_H = None # type: Callable[[],pyopencl.Event] + + cpml_psi_E = None # type: Dict[str, pyopencl.array.Array] + cpml_psi_H = None # type: Dict[str, pyopencl.array.Array] + + def __init__(self, + epsilon: List[numpy.ndarray], + dt: float=.99/numpy.sqrt(3), + initial_E: List[numpy.ndarray]=None, + initial_H: List[numpy.ndarray]=None, + context: pyopencl.Context=None, + queue: pyopencl.CommandQueue=None, + float_type: numpy.float32 or numpy.float64=numpy.float32): + """ + Initialize the simulation. + + :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray + spanning the simulation domain. Relative epsilon is used. + :param dt: Time step. Default is the Courant factor. + :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon. + :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon. + :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. + :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called. + :param float_type: numpy.float32 or numpy.float64. Default numpy.float32. + """ + + if len(epsilon) != 3: + Exception('Epsilon must be a list with length of 3') + if not all((e.shape == epsilon[0].shape for e in epsilon[1:])): + Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon]) + + if context is None: + self.context = pyopencl.create_some_context(False) + else: + self.context = context + + if queue is None: + self.queue = pyopencl.CommandQueue(self.context) + else: + self.queue = queue + + if dt > .99/numpy.sqrt(3): + warnings.warn('Warning: unstable dt: {}'.format(dt)) + elif dt <= 0: + raise Exception('Invalid dt: {}'.format(dt)) + else: + self.dt = dt + + self.arg_type = float_type + + self.eps = [pyopencl.array.to_device(self.queue, e.astype(float_type)) for e in epsilon] + + if initial_E is None: + self.E = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)] + else: + if len(initial_E) != 3: + Exception('Initial_E must be a list of length 3') + if not all((E.shape == epsilon[0].shape for E in initial_E)): + Exception('Initial_E list elements must have same shape as epsilon elements') + self.E = [pyopencl.array.to_device(self.queue, E.astype(float_type)) for E in initial_E] + + if initial_H is None: + self.H = [pyopencl.array.zeros_like(self.eps[0]) for _ in range(3)] + else: + if len(initial_H) != 3: + Exception('Initial_H must be a list of length 3') + if not all((H.shape == epsilon[0].shape for H in initial_H)): + Exception('Initial_H list elements must have same shape as epsilon elements') + self.H = [pyopencl.array.to_device(self.queue, H.astype(float_type)) for H in initial_H] + + ctype = type_to_C(self.arg_type) + E_args = [ctype + ' *E' + c for c in 'xyz'] + H_args = [ctype + ' *H' + c for c in 'xyz'] + eps_args = [ctype + ' *eps' + c for c in 'xyz'] + dt_arg = [ctype + ' dt'] + + sxyz = base.shape_source(epsilon[0].shape) + E_source = sxyz + base.dixyz_source + base.maxwell_E_source + H_source = sxyz + base.dixyz_source + base.maxwell_H_source + + E_update = ElementwiseKernel(self.context, operation=E_source, + arguments=', '.join(E_args + H_args + dt_arg + eps_args)) + + H_update = ElementwiseKernel(self.context, operation=H_source, + arguments=', '.join(E_args + H_args + dt_arg)) + + self.update_E = lambda e: E_update(*self.E, *self.H, self.dt, *self.eps, wait_for=e) + self.update_H = lambda e: H_update(*self.E, *self.H, self.dt, wait_for=e) + + def init_cpml(self, pml_args: List[Dict]): + """ + Initialize absorbing layers (cpml: complex phase matched layer). PMLs are not actual + boundary conditions, so you should add a conducting boundary (.init_conductors()) for + all directions in which you add PMLs. + Allows use of self.cpml_E(events) and self.cpml_H(events). + All necessary additional fields are created on the opencl device. + + :param pml_args: A list containing dictionaries which are passed to .boundary.cpml(...). + The dt argument is set automatically, but the others must be passed in each entry + of pml_args. + """ + sxyz = base.shape_source(self.eps[0].shape) + + # Prepare per-iteration constants for later use + pml_E_source = sxyz + base.dixyz_source + base.xyz_source + pml_H_source = sxyz + base.dixyz_source + base.xyz_source + + psi_E = [] + psi_H = [] + psi_E_names = [] + psi_H_names = [] + for arg_set in pml_args: + pml_data = boundary.cpml(dt=self.dt, **arg_set) + + pml_E_source += pml_data['E'] + pml_H_source += pml_data['H'] + + ti = numpy.delete(range(3), arg_set['direction']) + trans = [self.eps[0].shape[i] for i in ti] + psi_shape = (8, trans[0], trans[1]) + + psi_E += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type) + for _ in pml_data['psi_E']] + psi_H += [pyopencl.array.zeros(self.queue, psi_shape, dtype=self.arg_type) + for _ in pml_data['psi_H']] + + psi_E_names += pml_data['psi_E'] + psi_H_names += pml_data['psi_H'] + + ctype = type_to_C(self.arg_type) + E_args = [ctype + ' *E' + c for c in 'xyz'] + H_args = [ctype + ' *H' + c for c in 'xyz'] + eps_args = [ctype + ' *eps' + c for c in 'xyz'] + dt_arg = [ctype + ' dt'] + arglist_E = [ctype + ' *' + psi for psi in psi_E_names] + arglist_H = [ctype + ' *' + psi for psi in psi_H_names] + pml_E_args = ', '.join(E_args + H_args + dt_arg + eps_args + arglist_E) + pml_H_args = ', '.join(E_args + H_args + dt_arg + arglist_H) + + pml_E = ElementwiseKernel(self.context, preamble=pml_data['tables'], arguments=pml_E_args, operation=pml_E_source) + pml_H = ElementwiseKernel(self.context, preamble=pml_data['tables'], arguments=pml_H_args, operation=pml_H_source) + + self.cpml_E = lambda e: pml_E(*self.E, *self.H, self.dt, *self.eps, *psi_E, wait_for=e) + self.cpml_H = lambda e: pml_H(*self.E, *self.H, self.dt, *psi_H, wait_for=e) + self.cmpl_psi_E = {k: v for k, v in zip(psi_E_names, psi_E)} + self.cmpl_psi_H = {k: v for k, v in zip(psi_H_names, psi_H)} + + def init_conductors(self, conductor_args: List[Dict]): + """ + Initialize reflecting boundary conditions. + Allows use of self.conductor_E(events) and self.conductor_H(events). + + :param conductor_args: List of dictionaries with which to call .boundary.conductor(...). + """ + + sxyz = base.shape_source(self.eps[0].shape) + + # Prepare per-iteration constants + bc_E_source = sxyz + base.dixyz_source + base.xyz_source + bc_H_source = sxyz + base.dixyz_source + base.xyz_source + for arg_set in conductor_args: + [e, h] = boundary.conductor(**arg_set) + bc_E_source += e + bc_H_source += h + + E_args = [type_to_C(self.arg_type) + ' *E' + c for c in 'xyz'] + H_args = [type_to_C(self.arg_type) + ' *H' + c for c in 'xyz'] + bc_E = ElementwiseKernel(self.context, arguments=E_args, operation=bc_E_source) + bc_H = ElementwiseKernel(self.context, arguments=H_args, operation=bc_H_source) + + self.conductor_E = lambda e: bc_E(*self.E, wait_for=e) + self.conductor_H = lambda e: bc_H(*self.H, wait_for=e) diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py deleted file mode 100644 index b621c19..0000000 --- a/opencl_fdtd/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .simulation import Simulation, type_to_C - -__author__ = 'Jan Petykiewicz' - -version = '0.4' diff --git a/opencl_fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl deleted file mode 100644 index 769c248..0000000 --- a/opencl_fdtd/kernels/common.cl +++ /dev/null @@ -1,85 +0,0 @@ -{# -/* Common code for E, H updates - * - * Template parameters: - * ftype type name (e.g. float or double) - * shape list of 3 ints specifying shape of fields - */ -#} - -typedef {{ftype}} ftype; - - -/* - * Field size info - */ -const size_t sx = {{shape[0]}}; -const size_t sy = {{shape[1]}}; -const size_t sz = {{shape[2]}}; -const size_t field_size = sx * sy * sz; - -//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if -// i is outside the bounds of Ex[]. -if (i >= field_size) { - PYOPENCL_ELWISE_CONTINUE; -} - - -/* - * Array indexing - */ -// Given a linear index i and shape (sx, sy, sz), defines x, y, and z -// as the 3D indices of the current element (i). -// (ie, converts linear index [i] to field indices (x, y, z) -const size_t x = i / (sz * sy); -const size_t y = (i - x * sz * sy) / sz; -const size_t z = (i - y * sz - x * sz * sy); - -// Calculate linear index offsets corresponding to offsets in 3D -// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z) -const size_t dix = sz * sy; -const size_t diy = sz; -const size_t diz = 1; - - -/* - * Pointer math - */ -//Pointer offsets into the components of a linearized vector-field -// (eg. Hx = H + XX, where H and Hx are pointers) -const size_t XX = 0; -const size_t YY = field_size; -const size_t ZZ = field_size * 2; - -//Define pointers to vector components of each field (eg. Hx = H + XX) -__global ftype *Ex = E + XX; -__global ftype *Ey = E + YY; -__global ftype *Ez = E + ZZ; - -__global ftype *Hx = H + XX; -__global ftype *Hy = H + YY; -__global ftype *Hz = H + ZZ; - - -/* - * Implement periodic boundary conditions - * - * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction. - * In the event that we start at x == 0, we actually want to wrap around and grab the cell - * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix . - * - * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction. - * In the event that we start at x == (sx - 1), we actually want to wrap around and grab - * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix . - */ -{% for r in 'xyz' %} -int m{{r}} = - (int) di{{r}}; -int p{{r}} = + (int) di{{r}}; -int wrap_{{r}} = (s{{r}} - 1) * (int) di{{r}}; -if ( {{r}} == 0 ) { - m{{r}} = wrap_{{r}}; -} -if ( {{r}} == s{{r}} - 1 ) { - p{{r}} = -wrap_{{r}}; -} -{% endfor %} diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl deleted file mode 100644 index 9127181..0000000 --- a/opencl_fdtd/kernels/update_e.cl +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Update E-field, including any PMLs. - * - * Template parameters: - * common_header: Rendered contents of common.cl - * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing - * axes, polarities, and thicknesses. - * uniform_dx: If grid is uniform, uniform_dx should be the grid spacing. - * Otherwise, uniform_dx should be False and [inv_dh{xyz}] arrays must be supplied as - * OpenCL args. - * - * OpenCL args: - * E, H, dt, eps, [p{012}e{np}, Psi_{xyz}{np}_E], [inv_dh{xyz}] - */ - -{{common_header}} - -//////////////////////////////////////////////////////////////////////////// - -__global ftype *epsx = eps + XX; -__global ftype *epsy = eps + YY; -__global ftype *epsz = eps + ZZ; - - -{%- if uniform_dx %} -ftype inv_dx = 1.0 / {{uniform_dx}}; -ftype inv_dy = 1.0 / {{uniform_dx}}; -ftype inv_dz = 1.0 / {{uniform_dx}}; -{%- else %} -ftype inv_dx = inv_dhx[x]; -ftype inv_dy = inv_dhy[y]; -ftype inv_dz = inv_dhz[z]; -{%- endif %} - - -/* - * Precalculate derivatives - */ -ftype dHxy = (Hx[i] - Hx[i + my]) * inv_dy; -ftype dHxz = (Hx[i] - Hx[i + mz]) * inv_dz; - -ftype dHyx = (Hy[i] - Hy[i + mx]) * inv_dx; -ftype dHyz = (Hy[i] - Hy[i + mz]) * inv_dz; - -ftype dHzx = (Hz[i] - Hz[i + mx]) * inv_dx; -ftype dHzy = (Hz[i] - Hz[i + my]) * inv_dy; - -{% for bloch in bloch_boundaries -%} - {%- set r = bloch['axis'] -%} - {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} -if ({{r}} == 0) { - ftype bloch_im = {{bloch['real']}}; - ftype bloch_re = {{bloch['imag']}}; - dH{{u ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{u}}[i] - G{{u}}[i + m{{u}}]); - dH{{v ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{v}}[i] - G{{v}}[i + m{{v}}]); -} -{%- endfor %} - - -/* - * PML Update - */ -// PML effects on E -ftype pExi = 0; -ftype pEyi = 0; -ftype pEzi = 0; - -{% for pml in pmls -%} - {%- set r = pml['axis'] -%} - {%- set p = pml['polarity'] -%} - {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} - {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%} - {%- if r != 'y' -%} - {%- set se, sh = '-', '+' -%} - {%- else -%} - {%- set se, sh = '+', '-' -%} - {%- endif -%} - -int pml_{{r ~ p}}_thickness = {{pml['thickness']}}; - - {%- if p == 'n' %} - -if ( {{r}} < pml_{{r ~ p}}_thickness ) { - const size_t ir = {{r}}; // index into pml parameters - - {%- elif p == 'p' %} - -if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { - const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters - - {%- endif %} - const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi - dH{{v ~ r}} *= p{{r}}2e{{p}}[ir]; - dH{{u ~ r}} *= p{{r}}2e{{p}}[ir]; - {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}}; - {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}}; - pE{{u}}i {{se}}= {{psi ~ u}}[ip]; - pE{{v}}i {{sh}}= {{psi ~ v}}[ip]; -} -{%- endfor %} - -/* - * Update E - */ -Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi); -Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi); -Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi); diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl deleted file mode 100644 index 187a18a..0000000 --- a/opencl_fdtd/kernels/update_h.cl +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Update H-field, including any PMLs. - * Also precalculate values for poynting vector if necessary. - * - * Template parameters: - * common_header: Rendered contents of common.cl - * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing - * axes, polarities, and thicknesses. - * do_poynting: Whether to precalculate poynting vector components (boolean) - * uniform_dx: If grid is uniform, uniform_dx should be the grid spacing. - * Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as - * OpenCL args. - * - * OpenCL args: - * E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS] - */ - -{{common_header}} - -//////////////////////////////////////////////////////////////////////////// - -/* - * Precalculate derivatives - */ -{%- if uniform_dx %} -ftype inv_dx = 1.0 / {{uniform_dx}}; -ftype inv_dy = 1.0 / {{uniform_dx}}; -ftype inv_dz = 1.0 / {{uniform_dx}}; -{%- else %} -ftype inv_dx = inv_dex[x]; -ftype inv_dy = inv_dey[y]; -ftype inv_dz = inv_dez[z]; -{%- endif %} - - -ftype dExy = (Ex[i + py] - Ex[i]) * inv_dy; -ftype dExz = (Ex[i + pz] - Ex[i]) * inv_dz; - -ftype dEyx = (Ey[i + px] - Ey[i]) * inv_dx; -ftype dEyz = (Ey[i + pz] - Ey[i]) * inv_dz; - -ftype dEzx = (Ez[i + px] - Ez[i]) * inv_dx; -ftype dEzy = (Ez[i + py] - Ez[i]) * inv_dy; - - -{% for bloch in bloch_boundaries -%} - {%- set r = bloch['axis'] -%} - {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} -if ({{r}} == s{{r}} - 1) { - ftype bloch_re = {{bloch['real']}}; - ftype bloch_im = {{bloch['imag']}}; - dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]); - dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]); -} -{% endfor -%} - -{%- if do_poynting %} - - -/* - * Precalculate averaged E - */ -ftype aExy = Ex[i + py] + Ex[i]; -ftype aExz = Ex[i + pz] + Ex[i]; - -ftype aEyx = Ey[i + px] + Ey[i]; -ftype aEyz = Ey[i + pz] + Ey[i]; - -ftype aEzx = Ez[i + px] + Ez[i]; -ftype aEzy = Ez[i + py] + Ez[i]; -{%- endif %} - - - - -/* - * PML Update - */ -// PML contributions to H -ftype pHxi = 0; -ftype pHyi = 0; -ftype pHzi = 0; - -{% for pml in pmls -%} - {%- set r = pml['axis'] -%} - {%- set p = pml['polarity'] -%} - {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} - {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%} - {%- if r != 'y' -%} - {%- set se, sh = '-', '+' -%} - {%- else -%} - {%- set se, sh = '+', '-' -%} - {%- endif %} - -int pml_{{r ~ p}}_thickness = {{pml['thickness']}}; - - {%- if p == 'n' %} - -if ( {{r}} < pml_{{r ~ p}}_thickness ) { - const size_t ir = {{r}}; // index into pml parameters - - {%- elif p == 'p' %} - -if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { - const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters - - {%- endif %} - const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi - dE{{v ~ r}} *= p{{r}}2h{{p}}[ir]; - dE{{u ~ r}} *= p{{r}}2h{{p}}[ir]; - {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}}; - {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}}; - pH{{u}}i {{sh}}= {{psi ~ u}}[ip]; - pH{{v}}i {{se}}= {{psi ~ v}}[ip]; -} -{%- endfor %} - -/* - * Update H - */ -{% if do_poynting -%} -// Save old H for averaging -ftype Hx_old = Hx[i]; -ftype Hy_old = Hy[i]; -ftype Hz_old = Hz[i]; -{%- endif %} - -// H update equations -Hx[i] -= dt * (dEzy - dEyz - pHxi); -Hy[i] -= dt * (dExz - dEzx - pHyi); -Hz[i] -= dt * (dEyx - dExy - pHzi); - -{% if do_poynting -%} -// Average H across timesteps -ftype aHxt = Hx[i] + Hx_old; -ftype aHyt = Hy[i] + Hy_old; -ftype aHzt = Hz[i] + Hz_old; - -/* - * Calculate unscaled S components at H locations - */ -__global ftype *oSxy = oS + 0 * field_size; -__global ftype *oSyz = oS + 1 * field_size; -__global ftype *oSzx = oS + 2 * field_size; -__global ftype *oSxz = oS + 3 * field_size; -__global ftype *oSyx = oS + 4 * field_size; -__global ftype *oSzy = oS + 5 * field_size; - -oSxy[i] = aEyx * aHzt; -oSxz[i] = -aEzx * aHyt; -oSyz[i] = aEzy * aHxt; -oSyx[i] = -aExy * aHzt; -oSzx[i] = aExz * aHyt; -oSzy[i] = -aEyz * aHxt; -{%- endif -%} diff --git a/opencl_fdtd/kernels/update_j.cl b/opencl_fdtd/kernels/update_j.cl deleted file mode 100644 index 2682f39..0000000 --- a/opencl_fdtd/kernels/update_j.cl +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Update E-field from J field - * - * Template parameters: - * common_header: Rendered contents of common.cl - * - * OpenCL args: - * E, Jr, Ji, c, s, xmin, xmax, ymin, ymax, zmin, zmax - */ - -{{common_header}} - -//////////////////////////////////////////////////////////////////////////// - -__global ftype *Jrx = Jr + XX; -__global ftype *Jry = Jr + YY; -__global ftype *Jrz = Jr + ZZ; -__global ftype *Jix = Ji + XX; -__global ftype *Jiy = Ji + YY; -__global ftype *Jiz = Ji + ZZ; - - -if (x < xmin || y < ymin || z < zmin) { - PYOPENCL_ELWISE_CONTINUE; -} -if (x >= xmax || y >= ymax || z >= zmax) { - PYOPENCL_ELWISE_CONTINUE; -} - -Ex[i] += c * Jrx[i] + s * Jix[i]; -Ey[i] += c * Jry[i] + s * Jiy[i]; -Ez[i] += c * Jrz[i] + s * Jiz[i]; diff --git a/opencl_fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl deleted file mode 100644 index 94e061e..0000000 --- a/opencl_fdtd/kernels/update_s.cl +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Update E-field, including any PMLs. - * - * Template parameters: - * common_header: Rendered contents of common.cl - * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities - * pml_thickness: Number of cells (integer) - * - * OpenCL args: - * E, H, dt, S, oS - */ - -{{common_header}} - -////////////////////////////////////////////////////////////////////// - - -/* - * Calculate S from oS (pre-calculated components) - */ -__global ftype *Sx = S + XX; -__global ftype *Sy = S + YY; -__global ftype *Sz = S + ZZ; - -// Use unscaled S components from H locations -__global ftype *oSxy = oS + 0 * field_size; -__global ftype *oSyz = oS + 1 * field_size; -__global ftype *oSzx = oS + 2 * field_size; -__global ftype *oSxz = oS + 3 * field_size; -__global ftype *oSyx = oS + 4 * field_size; -__global ftype *oSzy = oS + 5 * field_size; - -ftype s_factor = dt * 0.125; -Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor; -Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor; -Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor; diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py deleted file mode 100644 index c5d0005..0000000 --- a/opencl_fdtd/simulation.py +++ /dev/null @@ -1,376 +0,0 @@ -""" -Class for constructing and holding the basic FDTD operations and fields -""" - -from typing import List, Dict, Callable -from collections import OrderedDict -import numpy -import jinja2 -import warnings - -import pyopencl -import pyopencl.array -from pyopencl.elementwise import ElementwiseKernel - -from fdfd_tools import vec - - -__author__ = 'Jan Petykiewicz' - - -# Create jinja2 env on module load -jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) - - -class Simulation(object): - """ - Constructs and holds the basic FDTD operations and related fields - - After constructing this object, call the (update_E, update_H, update_S) members - to perform FDTD updates on the stored (E, H, S) fields: - - pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np'] - sim = Simulation(grid.grids, do_poynting=True, pmls=pmls) - with open('sources.c', 'w') as f: - f.write('{}'.format(sim.sources)) - - for t in range(max_t): - sim.update_E([]).wait() - - # Find the linear index for the center point, for Ey - ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + \ - numpy.prod(grid.shape) * 1 - # Perturb the field (i.e., add a soft current source) - sim.E[ind] += numpy.sin(omega * t * sim.dt) - event = sim.update_H([]) - if sim.update_S: - event = sim.update_S([event]) - event.wait() - - with lzma.open('saved_simulation', 'wb') as f: - dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f) - - Code in the form - event2 = sim.update_H([event0, event1]) - indicates that the update_H operation should be prepared immediately, but wait for - event0 and event1 to occur (i.e. previous operations to finish) before starting execution. - event2 can then be used to prepare further operations to be run after update_H. - """ - E = None # type: pyopencl.array.Array - H = None # type: pyopencl.array.Array - S = None # type: pyopencl.array.Array - eps = None # type: pyopencl.array.Array - dt = None # type: float - inv_dxes = None # type: List[pyopencl.array.Array] - - arg_type = None # type: numpy.float32 or numpy.float64 - - context = None # type: pyopencl.Context - queue = None # type: pyopencl.CommandQueue - - update_E = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] - update_H = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] - update_S = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] - update_J = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] - sources = None # type: Dict[str, str] - - def __init__(self, - epsilon: List[numpy.ndarray], - pmls: List[Dict[str, int or float]], - bloch_boundaries: List[Dict[str, int or float]] = (), - dxes: List[List[numpy.ndarray]] or float = None, - dt: float = None, - initial_fields: Dict[str, List[numpy.ndarray]] = None, - context: pyopencl.Context = None, - queue: pyopencl.CommandQueue = None, - float_type: numpy.float32 or numpy.float64 = numpy.float32, - do_poynting: bool = True, - do_fieldsrc: bool = False): - """ - Initialize the simulation. - - :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray - spanning the simulation domain. Relative epsilon is used. - :param pmls: List of dicts with keys: - 'axis': One of 'x', 'y', 'z'. - 'direction': One of 'n', 'p'. - 'thickness': Number of layers, default 8. - 'epsilon_eff': Effective epsilon to match to. Default 1.0. - 'mu_eff': Effective mu to match to. Default 1.0. - 'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6. - 'm': Polynomial grading exponent. Default 3.5. - 'ma': Exponent for alpha. Default 1. - :param bloch_boundaries: List of dicts with keys: - 'axis': One of 'x', 'y', 'z'. - 'real': Real part of bloch phase factor (i.e. real(exp(i * phase))) - 'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase))) - :param dt: Time step. Default is min(dxes) * .99/sqrt(3). - :param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the - specified fields (default is 0 everywhere). Fields have same format as epsilon. - :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. - :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called. - :param float_type: numpy.float32 or numpy.float64. Default numpy.float32. - :param do_poynting: If true, enables calculation of the poynting vector, S. - Poynting vector calculation adds the following computational burdens: - * During update_H, ~6 extra additions/cell are performed in order to spatially - average E and temporally average H. These quantities are multiplied - (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly). - * update_S performs a discrete cross product using the precalculated products - from update_H. This is not nice to the cache and similar to e.g. update_E - in complexity. - * GPU memory requirements are approximately doubled, since S and the intermediate - products must be stored. - """ - if initial_fields is None: - initial_fields = {} - - self.shape = epsilon[0].shape - self.arg_type = float_type - self.sources = {} - self._create_context(context, queue) - self._create_eps(epsilon) - - if dxes is None: - dxes = 1.0 - - if isinstance(dxes, (float, int)): - uniform_dx = dxes - min_dx = dxes - else: - uniform_dx = False - self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]] - min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1]) - - max_dt = min_dx * .99 / numpy.sqrt(3) - - if dt is None: - self.dt = max_dt - elif dt > max_dt: - warnings.warn('Warning: unstable dt: {}'.format(dt)) - elif dt <= 0: - raise Exception('Invalid dt: {}'.format(dt)) - else: - self.dt = dt - - self.E = self._create_field(initial_fields.get('E', None)) - self.H = self._create_field(initial_fields.get('H', None)) - if bloch_boundaries: - self.F = self._create_field(initial_fields.get('F', None)) - self.G = self._create_field(initial_fields.get('G', None)) - - for pml in pmls: - pml.setdefault('thickness', 8) - pml.setdefault('epsilon_eff', 1.0) - pml.setdefault('mu_eff', 1.0) - pml.setdefault('ln_R_per_layer', -1.6) - pml.setdefault('m', 3.5) - pml.setdefault('cfs_alpha', 0) - pml.setdefault('ma', 1) - - ctype = type_to_C(self.arg_type) - - def ptr(arg: str) -> str: - return ctype + ' *' + arg - - base_fields = OrderedDict() - base_fields[ptr('E')] = self.E - base_fields[ptr('H')] = self.H - base_fields[ctype + ' dt'] = self.dt - if uniform_dx == False: - inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz'] - for name, field in zip(inv_dx_names, self.inv_dxes): - base_fields[ptr(name)] = field - - eps_field = OrderedDict() - eps_field[ptr('eps')] = self.eps - - if bloch_boundaries: - base_fields[ptr('F')] = self.F - base_fields[ptr('G')] = self.G - - bloch_fields = OrderedDict() - bloch_fields[ptr('E')] = self.F - bloch_fields[ptr('H')] = self.G - bloch_fields[ctype + ' dt'] = self.dt - bloch_fields[ptr('F')] = self.E - bloch_fields[ptr('G')] = self.H - - common_source = jinja_env.get_template('common.cl').render( - ftype=ctype, - shape=self.shape, - ) - jinja_args = { - 'common_header': common_source, - 'pmls': pmls, - 'do_poynting': do_poynting, - 'bloch': bloch_boundaries, - 'uniform_dx': uniform_dx, - } - E_source = jinja_env.get_template('update_e.cl').render(**jinja_args) - H_source = jinja_env.get_template('update_h.cl').render(**jinja_args) - self.sources['E'] = E_source - self.sources['H'] = H_source - - if bloch_boundaries: - bloch_args = jinja_args.copy() - bloch_args['do_poynting'] = False - bloch_args['bloch'] = [{'axis': b['axis'], - 'real': b['imag'], - 'imag': b['real']} - for b in bloch_boundaries] - F_source = jinja_env.get_template('update_e.cl').render(**bloch_args) - G_source = jinja_env.get_template('update_h.cl').render(**bloch_args) - self.sources['F'] = F_source - self.sources['G'] = G_source - - - S_fields = OrderedDict() - if do_poynting: - S_source = jinja_env.get_template('update_s.cl').render(**jinja_args) - self.sources['S'] = S_source - - self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type) - self.S = pyopencl.array.zeros_like(self.E) - S_fields[ptr('oS')] = self.oS - S_fields[ptr('S')] = self.S - - J_fields = OrderedDict() - if do_fieldsrc: - J_source = jinja_env.get_template('update_j.cl').render(**jinja_args) - self.sources['J'] = J_source - - self.Ji = pyopencl.array.zeros_like(self.E) - self.Jr = pyopencl.array.zeros_like(self.E) - J_fields[ptr('Jr')] = self.Jr - J_fields[ptr('Ji')] = self.Ji - - - ''' - PML - ''' - pml_e_fields, pml_h_fields = self._create_pmls(pmls) - if bloch_boundaries: - pml_f_fields, pml_g_fields = self._create_pmls(pmls) - - ''' - Create operations - ''' - self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields)) - self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields)) - if do_poynting: - self.update_S = self._create_operation(S_source, (base_fields, S_fields)) - if bloch_boundaries: - self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields)) - self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields)) - if do_fieldsrc: - args = OrderedDict() - [args.update(d) for d in (base_fields, J_fields)] - var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')] - update = ElementwiseKernel(self.context, operation=J_source, - arguments=', '.join(list(args.keys()) + var_args)) - self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e) - - - def _create_pmls(self, pmls): - ctype = type_to_C(self.arg_type) - def ptr(arg: str) -> str: - return ctype + ' *' + arg - - pml_e_fields = OrderedDict() - pml_h_fields = OrderedDict() - for pml in pmls: - a = 'xyz'.find(pml['axis']) - - sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) - kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff']) - alpha_max = pml['cfs_alpha'] - - def par(x): - scaling = (x / pml['thickness']) ** pml['m'] - sigma = scaling * sigma_max - kappa = 1 + scaling * (kappa_max - 1) - alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max - p0 = numpy.exp(-(sigma / kappa + alpha) * self.dt) - p1 = sigma / (sigma + kappa * alpha) * (p0 - 1) - p2 = 1 / kappa - return p0, p1, p2 - - xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2)) - if pml['polarity'] == 'p': - xe -= 0.5 - elif pml['polarity'] == 'n': - xh -= 0.5 - - pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh'] - for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)): - pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe) - pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph) - - uv = 'xyz'.replace(pml['axis'], '') - psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_' - psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH'] - - psi_shape = list(self.shape) - psi_shape[a] = pml['thickness'] - - for ne, nh in zip(*psi_names): - pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) - pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) - return pml_e_fields, pml_h_fields - - def _create_operation(self, source, args_fields): - args = OrderedDict() - [args.update(d) for d in args_fields] - update = ElementwiseKernel(self.context, operation=source, - arguments=', '.join(args.keys())) - return lambda e: update(*args.values(), wait_for=e) - - def _create_context(self, context: pyopencl.Context = None, - queue: pyopencl.CommandQueue = None): - if context is None: - self.context = pyopencl.create_some_context() - else: - self.context = context - - if queue is None: - self.queue = pyopencl.CommandQueue(self.context) - else: - self.queue = queue - - def _create_eps(self, epsilon: List[numpy.ndarray]): - if len(epsilon) != 3: - raise Exception('Epsilon must be a list with length of 3') - if not all((e.shape == epsilon[0].shape for e in epsilon[1:])): - raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon]) - if not epsilon[0].shape == self.shape: - raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape)) - self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type)) - - def _create_field(self, initial_value: List[numpy.ndarray] = None): - if initial_value is None: - return pyopencl.array.zeros_like(self.eps) - else: - if len(initial_value) != 3: - Exception('Initial field value must be a list of length 3') - if not all((f.shape == self.shape for f in initial_value)): - Exception('Initial field list elements must have same shape as epsilon elements') - return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type)) - - -def type_to_C(float_type) -> str: - """ - Returns a string corresponding to the C equivalent of a numpy type. - Only works for float16, float32, float64. - - :param float_type: e.g. numpy.float32 - :return: string containing the corresponding C type (eg. 'double') - """ - if float_type == numpy.float16: - arg_type = 'half' - elif float_type == numpy.float32: - arg_type = 'float' - elif float_type == numpy.float64: - arg_type = 'double' - else: - raise Exception('Unsupported type') - return arg_type diff --git a/requirements.txt b/requirements.txt index 6626171..6121670 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,6 @@ numpy h5py pyopencl -jinja2 --e git+https://mpxd.net/code/jan/float_raster.git@release#egg=float_raster --e git+https://mpxd.net/code/jan/gridlock.git@release#egg=gridlock --e git+https://mpxd.net/code/jan/masque.git@release#egg=masque --e git+https://mpxd.net/code/jan/fdfd_tools.git@master#egg=fdfd_tools +-e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster +-e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock +-e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque diff --git a/setup.py b/setup.py deleted file mode 100644 index b7fd3cf..0000000 --- a/setup.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python3 - -from setuptools import setup, find_packages -import opencl_fdtd - -with open('README.md', 'r') as f: - long_description = f.read() - -setup(name='opencl_fdtd', - version=opencl_fdtd.version, - description='OpenCL FDTD solver', - long_description=long_description, - long_description_content_type='text/markdown', - author='Jan Petykiewicz', - author_email='anewusername@gmail.com', - url='https://mpxd.net/code/jan/opencl_fdtd', - packages=find_packages(), - package_data={ - 'opencl_fdfd': ['kernels/*'] - }, - install_requires=[ - 'numpy', - 'pyopencl', - 'jinja2', - 'fdfd_tools>=0.3', - ], - extras_require={ - }, - ) -