diff --git a/.gitignore b/.gitignore index 3b93ce0..5298f42 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,9 @@ .idea/ -__pycache__ *.h5 -*.pyc + +__pycache__ +*.py[cod] +build/ +dist/ +*.egg-info/ diff --git a/README.md b/README.md index fbc972b..731450b 100644 --- a/README.md +++ b/README.md @@ -27,14 +27,16 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs). * numpy * pyopencl * jinja2 +* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools) + +Optional (used for examples): * dill (for file output) -* [gridlock](https://mpxd.net/gogs/jan/gridlock) -* [masque](https://mpxd.net/gogs/jan/masque) -* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools) +* [gridlock](https://mpxd.net/code/jan/gridlock) +* [masque](https://mpxd.net/code/jan/masque) To get the code, just clone this repository: ```bash -git clone https://mpxd.net/gogs/jan/opencl_fdtd.git +git clone https://mpxd.net/code/jan/opencl_fdtd.git ``` You can install the requirements and their dependencies easily with diff --git a/fdtd.py b/fdtd.py index bbdabec..99af07b 100644 --- a/fdtd.py +++ b/fdtd.py @@ -6,12 +6,13 @@ See main() for simulation setup. import sys import time +import logging import numpy -import lzma +import lzma import dill -from fdtd.simulation import Simulation +from opencl_fdtd import Simulation from masque import Pattern, shapes import gridlock import pcgen @@ -20,6 +21,9 @@ import fdfd_tools __author__ = 'Jan Petykiewicz' +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern: """ @@ -120,9 +124,13 @@ def main(): eps=n_air**2, polygons=mask.as_polygons()) - print('grid shape: {}'.format(grid.shape)) + logger.info('grid shape: {}'.format(grid.shape)) # #### Create the simulation grid #### - sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) + pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness} + for a in 'xyz' for p in 'np'] + #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x'] + bloch = [] + sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch) # Source parameters and function w = 2 * numpy.pi * dx / wl @@ -133,31 +141,41 @@ def main(): def field_source(i): t0 = i * sim.dt - delay return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2) - + with open('sources.c', 'w') as f: f.write(sim.sources['E']) - f.write('\n==========================================\n') + f.write('\n====================H======================\n') f.write(sim.sources['H']) if sim.update_S: - f.write('\n==========================================\n') + f.write('\n=====================S=====================\n') f.write(sim.sources['S']) + if bloch: + f.write('\n=====================F=====================\n') + f.write(sim.sources['F']) + f.write('\n=====================G=====================\n') + f.write(sim.sources['G']) # #### Run a bunch of iterations #### # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued # immediately and run once prev_event is finished. start = time.perf_counter() for t in range(max_t): - sim.update_E([]).wait() + e = sim.update_E([]) + if bloch: + e = sim.update_F([e]) + e.wait() ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape) sim.E[ind] += field_source(t) e = sim.update_H([]) + if bloch: + e = sim.update_G([e]) if sim.update_S: e = sim.update_S([e]) e.wait() if t % 100 == 0: - print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) + logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) sys.stdout.flush() with lzma.open('saved_simulation', 'wb') as f: @@ -172,5 +190,6 @@ def main(): d['S'] = unvec(sim.S.get()) dill.dump(d, f) + if __name__ == '__main__': main() diff --git a/fdtd/__init__.py b/fdtd/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/fdtd/kernels/update_e.cl b/fdtd/kernels/update_e.cl deleted file mode 100644 index 2728b46..0000000 --- a/fdtd/kernels/update_e.cl +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Update E-field, including any PMLs. - * - * Template parameters: - * common_header: Rendered contents of common.cl - * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities - * pml_thickness: Number of cells (integer) - * - * OpenCL args: - * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] - */ - -{{common_header}} - -//////////////////////////////////////////////////////////////////////////// - -__global ftype *epsx = eps + XX; -__global ftype *epsy = eps + YY; -__global ftype *epsz = eps + ZZ; - -{% if pmls -%} -const int pml_thickness = {{pml_thickness}}; -{%- endif %} - -/* - * Precalclate derivatives - */ -ftype dHxy = Hx[i] - Hx[i + my]; -ftype dHxz = Hx[i] - Hx[i + mz]; - -ftype dHyx = Hy[i] - Hy[i + mx]; -ftype dHyz = Hy[i] - Hy[i + mz]; - -ftype dHzx = Hz[i] - Hz[i + mx]; -ftype dHzy = Hz[i] - Hz[i + my]; - -/* - * PML Update - */ -// PML effects on E -ftype pExi = 0; -ftype pEyi = 0; -ftype pEzi = 0; - -{% for r, p in pmls -%} - {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} - {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%} - {%- if r != 'y' -%} - {%- set se, sh = '-', '+' -%} - {%- else -%} - {%- set se, sh = '+', '-' -%} - {%- endif -%} - - {%- if p == 'n' %} - -if ( {{r}} < pml_thickness ) { - const size_t ir = {{r}}; // index into pml parameters - - {%- elif p == 'p' %} - -if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { - const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters - - {%- endif %} - const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi - {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}}; - {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}}; - pE{{u}}i {{se}}= {{psi ~ u}}[ip]; - pE{{v}}i {{sh}}= {{psi ~ v}}[ip]; -} -{%- endfor %} - -/* - * Update E - */ -Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi); -Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi); -Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi); diff --git a/fdtd/simulation.py b/fdtd/simulation.py deleted file mode 100644 index f1a8dec..0000000 --- a/fdtd/simulation.py +++ /dev/null @@ -1,238 +0,0 @@ -""" -Class for constructing and holding the basic FDTD operations and fields -""" - -from typing import List, Dict, Callable -from collections import OrderedDict -import numpy -import jinja2 -import warnings - -import pyopencl -import pyopencl.array -from pyopencl.elementwise import ElementwiseKernel - -from fdfd_tools import vec - - -__author__ = 'Jan Petykiewicz' - - -# Create jinja2 env on module load -jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) - - -class Simulation(object): - """ - Constructs and holds the basic FDTD operations and related fields - """ - E = None # type: List[pyopencl.array.Array] - H = None # type: List[pyopencl.array.Array] - S = None # type: List[pyopencl.array.Array] - eps = None # type: List[pyopencl.array.Array] - dt = None # type: float - - arg_type = None # type: numpy.float32 or numpy.float64 - - context = None # type: pyopencl.Context - queue = None # type: pyopencl.CommandQueue - - update_E = None # type: Callable[[],pyopencl.Event] - update_H = None # type: Callable[[],pyopencl.Event] - update_S = None # type: Callable[[],pyopencl.Event] - sources = None # type: Dict[str, str] - - def __init__(self, - epsilon: List[numpy.ndarray], - dt: float = .99/numpy.sqrt(3), - initial_E: List[numpy.ndarray] = None, - initial_H: List[numpy.ndarray] = None, - context: pyopencl.Context = None, - queue: pyopencl.CommandQueue = None, - float_type: numpy.float32 or numpy.float64 = numpy.float32, - pml_thickness: int = 10, - pmls: List[List[str]] = None, - do_poynting: bool = True): - """ - Initialize the simulation. - - :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray - spanning the simulation domain. Relative epsilon is used. - :param dt: Time step. Default is the Courant factor. - :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon. - :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon. - :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. - :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called. - :param float_type: numpy.float32 or numpy.float64. Default numpy.float32. - """ - - if len(epsilon) != 3: - Exception('Epsilon must be a list with length of 3') - if not all((e.shape == epsilon[0].shape for e in epsilon[1:])): - Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon]) - - if context is None: - self.context = pyopencl.create_some_context() - else: - self.context = context - - if queue is None: - self.queue = pyopencl.CommandQueue(self.context) - else: - self.queue = queue - - if dt > .99/numpy.sqrt(3): - warnings.warn('Warning: unstable dt: {}'.format(dt)) - elif dt <= 0: - raise Exception('Invalid dt: {}'.format(dt)) - else: - self.dt = dt - - self.arg_type = float_type - self.sources = {} - self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type)) - - if initial_E is None: - self.E = pyopencl.array.zeros_like(self.eps) - else: - if len(initial_E) != 3: - Exception('Initial_E must be a list of length 3') - if not all((E.shape == epsilon[0].shape for E in initial_E)): - Exception('Initial_E list elements must have same shape as epsilon elements') - self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type)) - - if initial_H is None: - self.H = pyopencl.array.zeros_like(self.eps) - else: - if len(initial_H) != 3: - Exception('Initial_H must be a list of length 3') - if not all((H.shape == epsilon[0].shape for H in initial_H)): - Exception('Initial_H list elements must have same shape as epsilon elements') - self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type)) - - if pmls is None: - pmls = [[d, p] for d in 'xyz' for p in 'np'] - - ctype = type_to_C(self.arg_type) - - def ptr(arg: str) -> str: - return ctype + ' *' + arg - - base_fields = OrderedDict() - base_fields[ptr('E')] = self.E - base_fields[ptr('H')] = self.H - base_fields[ctype + ' dt'] = self.dt - - eps_field = OrderedDict() - eps_field[ptr('eps')] = self.eps - - common_source = jinja_env.get_template('common.cl').render( - ftype=ctype, - shape=epsilon[0].shape, - ) - jinja_args = { - 'common_header': common_source, - 'pml_thickness': pml_thickness, - 'pmls': pmls, - 'do_poynting': do_poynting, - } - E_source = jinja_env.get_template('update_e.cl').render(**jinja_args) - H_source = jinja_env.get_template('update_h.cl').render(**jinja_args) - - self.sources['E'] = E_source - self.sources['H'] = H_source - - if do_poynting: - S_source = jinja_env.get_template('update_s.cl').render(**jinja_args) - self.sources['S'] = S_source - - self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type) - self.S = pyopencl.array.zeros_like(self.E) - S_fields = OrderedDict() - S_fields[ptr('oS')] = self.oS - S_fields[ptr('S')] = self.S - else: - S_fields = OrderedDict() - - ''' - PML - ''' - m = (3.5, 1) - sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0) - alpha_max = 0 # TODO: Decide what to do about non-zero alpha - - def par(x): - sigma = ((x / pml_thickness) ** m[0]) * sigma_max - alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max - p0 = numpy.exp(-(sigma + alpha) * dt) - p1 = sigma / (sigma + alpha) * (p0 - 1) - return p0, p1 - - xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4)) - xep -= 0.5 - xhn -= 0.5 - - pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh'] - pml_e_fields = OrderedDict() - pml_h_fields = OrderedDict() - for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)): - pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe) - pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph) - - for pml in pmls: - uv = 'xyz'.replace(pml[0], '') - psi_base = 'Psi_' + ''.join(pml) + '_' - psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH'] - - psi_shape = list(epsilon[0].shape) - psi_shape['xyz'.find(pml[0])] = pml_thickness - - for ne, nh in zip(*psi_names): - pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) - pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) - - self.pml_e_fields = pml_e_fields - self.pml_h_fields = pml_h_fields - - - ''' - Create operations - ''' - E_args = OrderedDict() - [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)] - E_update = ElementwiseKernel(self.context, operation=E_source, - arguments=', '.join(E_args.keys())) - - H_args = OrderedDict() - [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)] - H_update = ElementwiseKernel(self.context, operation=H_source, - arguments=', '.join(H_args.keys())) - self.update_E = lambda e: E_update(*E_args.values(), wait_for=e) - self.update_H = lambda e: H_update(*H_args.values(), wait_for=e) - - if do_poynting: - S_args = OrderedDict() - [S_args.update(d) for d in (base_fields, S_fields)] - S_update = ElementwiseKernel(self.context, operation=S_source, - arguments=', '.join(S_args.keys())) - - self.update_S = lambda e: S_update(*S_args.values(), wait_for=e) - - -def type_to_C(float_type) -> str: - """ - Returns a string corresponding to the C equivalent of a numpy type. - Only works for float16, float32, float64. - - :param float_type: e.g. numpy.float32 - :return: string containing the corresponding C type (eg. 'double') - """ - if float_type == numpy.float16: - arg_type = 'half' - elif float_type == numpy.float32: - arg_type = 'float' - elif float_type == numpy.float64: - arg_type = 'double' - else: - raise Exception('Unsupported type') - return arg_type diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py new file mode 100644 index 0000000..b621c19 --- /dev/null +++ b/opencl_fdtd/__init__.py @@ -0,0 +1,5 @@ +from .simulation import Simulation, type_to_C + +__author__ = 'Jan Petykiewicz' + +version = '0.4' diff --git a/fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl similarity index 91% rename from fdtd/kernels/common.cl rename to opencl_fdtd/kernels/common.cl index d203ca7..769c248 100644 --- a/fdtd/kernels/common.cl +++ b/opencl_fdtd/kernels/common.cl @@ -2,7 +2,7 @@ /* Common code for E, H updates * * Template parameters: - * ftype type name (e.g. float or double) + * ftype type name (e.g. float or double) * shape list of 3 ints specifying shape of fields */ #} @@ -73,12 +73,13 @@ __global ftype *Hz = H + ZZ; * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix . */ {% for r in 'xyz' %} -int m{{r}} = -di{{r}}; -int p{{r}} = +di{{r}}; -int wrap_{{r}} = (s{{r}} - 1) * di{{r}}; +int m{{r}} = - (int) di{{r}}; +int p{{r}} = + (int) di{{r}}; +int wrap_{{r}} = (s{{r}} - 1) * (int) di{{r}}; if ( {{r}} == 0 ) { m{{r}} = wrap_{{r}}; -} else if ( {{r}} == s{{r}} - 1 ) { +} +if ( {{r}} == s{{r}} - 1 ) { p{{r}} = -wrap_{{r}}; -} +} {% endfor %} diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl new file mode 100644 index 0000000..9127181 --- /dev/null +++ b/opencl_fdtd/kernels/update_e.cl @@ -0,0 +1,107 @@ +/* + * Update E-field, including any PMLs. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing + * axes, polarities, and thicknesses. + * uniform_dx: If grid is uniform, uniform_dx should be the grid spacing. + * Otherwise, uniform_dx should be False and [inv_dh{xyz}] arrays must be supplied as + * OpenCL args. + * + * OpenCL args: + * E, H, dt, eps, [p{012}e{np}, Psi_{xyz}{np}_E], [inv_dh{xyz}] + */ + +{{common_header}} + +//////////////////////////////////////////////////////////////////////////// + +__global ftype *epsx = eps + XX; +__global ftype *epsy = eps + YY; +__global ftype *epsz = eps + ZZ; + + +{%- if uniform_dx %} +ftype inv_dx = 1.0 / {{uniform_dx}}; +ftype inv_dy = 1.0 / {{uniform_dx}}; +ftype inv_dz = 1.0 / {{uniform_dx}}; +{%- else %} +ftype inv_dx = inv_dhx[x]; +ftype inv_dy = inv_dhy[y]; +ftype inv_dz = inv_dhz[z]; +{%- endif %} + + +/* + * Precalculate derivatives + */ +ftype dHxy = (Hx[i] - Hx[i + my]) * inv_dy; +ftype dHxz = (Hx[i] - Hx[i + mz]) * inv_dz; + +ftype dHyx = (Hy[i] - Hy[i + mx]) * inv_dx; +ftype dHyz = (Hy[i] - Hy[i + mz]) * inv_dz; + +ftype dHzx = (Hz[i] - Hz[i + mx]) * inv_dx; +ftype dHzy = (Hz[i] - Hz[i + my]) * inv_dy; + +{% for bloch in bloch_boundaries -%} + {%- set r = bloch['axis'] -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} +if ({{r}} == 0) { + ftype bloch_im = {{bloch['real']}}; + ftype bloch_re = {{bloch['imag']}}; + dH{{u ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{u}}[i] - G{{u}}[i + m{{u}}]); + dH{{v ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{v}}[i] - G{{v}}[i + m{{v}}]); +} +{%- endfor %} + + +/* + * PML Update + */ +// PML effects on E +ftype pExi = 0; +ftype pEyi = 0; +ftype pEzi = 0; + +{% for pml in pmls -%} + {%- set r = pml['axis'] -%} + {%- set p = pml['polarity'] -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} + {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%} + {%- if r != 'y' -%} + {%- set se, sh = '-', '+' -%} + {%- else -%} + {%- set se, sh = '+', '-' -%} + {%- endif -%} + +int pml_{{r ~ p}}_thickness = {{pml['thickness']}}; + + {%- if p == 'n' %} + +if ( {{r}} < pml_{{r ~ p}}_thickness ) { + const size_t ir = {{r}}; // index into pml parameters + + {%- elif p == 'p' %} + +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { + const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters + + {%- endif %} + const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi + dH{{v ~ r}} *= p{{r}}2e{{p}}[ir]; + dH{{u ~ r}} *= p{{r}}2e{{p}}[ir]; + {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}}; + {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}}; + pE{{u}}i {{se}}= {{psi ~ u}}[ip]; + pE{{v}}i {{sh}}= {{psi ~ v}}[ip]; +} +{%- endfor %} + +/* + * Update E + */ +Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi); +Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi); +Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi); diff --git a/fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl similarity index 53% rename from fdtd/kernels/update_h.cl rename to opencl_fdtd/kernels/update_h.cl index e093cb9..187a18a 100644 --- a/fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -1,36 +1,58 @@ /* * Update H-field, including any PMLs. * Also precalculate values for poynting vector if necessary. - * + * * Template parameters: * common_header: Rendered contents of common.cl - * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities - * pml_thickness: Number of cells (integer) - * do_poynting: Whether to precalculate poynting vector components (boolean) + * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing + * axes, polarities, and thicknesses. + * do_poynting: Whether to precalculate poynting vector components (boolean) + * uniform_dx: If grid is uniform, uniform_dx should be the grid spacing. + * Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as + * OpenCL args. * * OpenCL args: - * E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS] + * E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS] */ {{common_header}} //////////////////////////////////////////////////////////////////////////// -{% if pmls -%} -const int pml_thickness = {{pml_thickness}}; -{%- endif %} - /* * Precalculate derivatives */ -ftype dExy = Ex[i + py] - Ex[i]; -ftype dExz = Ex[i + pz] - Ex[i]; +{%- if uniform_dx %} +ftype inv_dx = 1.0 / {{uniform_dx}}; +ftype inv_dy = 1.0 / {{uniform_dx}}; +ftype inv_dz = 1.0 / {{uniform_dx}}; +{%- else %} +ftype inv_dx = inv_dex[x]; +ftype inv_dy = inv_dey[y]; +ftype inv_dz = inv_dez[z]; +{%- endif %} -ftype dEyx = Ey[i + px] - Ey[i]; -ftype dEyz = Ey[i + pz] - Ey[i]; -ftype dEzx = Ez[i + px] - Ez[i]; -ftype dEzy = Ez[i + py] - Ez[i]; +ftype dExy = (Ex[i + py] - Ex[i]) * inv_dy; +ftype dExz = (Ex[i + pz] - Ex[i]) * inv_dz; + +ftype dEyx = (Ey[i + px] - Ey[i]) * inv_dx; +ftype dEyz = (Ey[i + pz] - Ey[i]) * inv_dz; + +ftype dEzx = (Ez[i + px] - Ez[i]) * inv_dx; +ftype dEzy = (Ez[i + py] - Ez[i]) * inv_dy; + + +{% for bloch in bloch_boundaries -%} + {%- set r = bloch['axis'] -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} +if ({{r}} == s{{r}} - 1) { + ftype bloch_re = {{bloch['real']}}; + ftype bloch_im = {{bloch['imag']}}; + dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]); + dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]); +} +{% endfor -%} {%- if do_poynting %} @@ -49,6 +71,8 @@ ftype aEzy = Ez[i + py] + Ez[i]; {%- endif %} + + /* * PML Update */ @@ -57,29 +81,35 @@ ftype pHxi = 0; ftype pHyi = 0; ftype pHzi = 0; -{%- for r, p in pmls -%} +{% for pml in pmls -%} + {%- set r = pml['axis'] -%} + {%- set p = pml['polarity'] -%} {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%} {%- if r != 'y' -%} {%- set se, sh = '-', '+' -%} {%- else -%} {%- set se, sh = '+', '-' -%} - {%- endif -%} + {%- endif %} + +int pml_{{r ~ p}}_thickness = {{pml['thickness']}}; {%- if p == 'n' %} -if ( {{r}} < pml_thickness ) { +if ( {{r}} < pml_{{r ~ p}}_thickness ) { const size_t ir = {{r}}; // index into pml parameters {%- elif p == 'p' %} -if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters {%- endif %} const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi - {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}}; - {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}}; + dE{{v ~ r}} *= p{{r}}2h{{p}}[ir]; + dE{{u ~ r}} *= p{{r}}2h{{p}}[ir]; + {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}}; + {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}}; pH{{u}}i {{sh}}= {{psi ~ u}}[ip]; pH{{v}}i {{se}}= {{psi ~ v}}[ip]; } @@ -96,9 +126,9 @@ ftype Hz_old = Hz[i]; {%- endif %} // H update equations -Hx[i] -= dt * (dEzy - dEyz + pHxi); -Hy[i] -= dt * (dExz - dEzx + pHyi); -Hz[i] -= dt * (dEyx - dExy + pHzi); +Hx[i] -= dt * (dEzy - dEyz - pHxi); +Hy[i] -= dt * (dExz - dEzx - pHyi); +Hz[i] -= dt * (dEyx - dExy - pHzi); {% if do_poynting -%} // Average H across timesteps diff --git a/opencl_fdtd/kernels/update_j.cl b/opencl_fdtd/kernels/update_j.cl new file mode 100644 index 0000000..2682f39 --- /dev/null +++ b/opencl_fdtd/kernels/update_j.cl @@ -0,0 +1,32 @@ +/* + * Update E-field from J field + * + * Template parameters: + * common_header: Rendered contents of common.cl + * + * OpenCL args: + * E, Jr, Ji, c, s, xmin, xmax, ymin, ymax, zmin, zmax + */ + +{{common_header}} + +//////////////////////////////////////////////////////////////////////////// + +__global ftype *Jrx = Jr + XX; +__global ftype *Jry = Jr + YY; +__global ftype *Jrz = Jr + ZZ; +__global ftype *Jix = Ji + XX; +__global ftype *Jiy = Ji + YY; +__global ftype *Jiz = Ji + ZZ; + + +if (x < xmin || y < ymin || z < zmin) { + PYOPENCL_ELWISE_CONTINUE; +} +if (x >= xmax || y >= ymax || z >= zmax) { + PYOPENCL_ELWISE_CONTINUE; +} + +Ex[i] += c * Jrx[i] + s * Jix[i]; +Ey[i] += c * Jry[i] + s * Jiy[i]; +Ez[i] += c * Jrz[i] + s * Jiz[i]; diff --git a/fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl similarity index 94% rename from fdtd/kernels/update_s.cl rename to opencl_fdtd/kernels/update_s.cl index 0310411..94e061e 100644 --- a/fdtd/kernels/update_s.cl +++ b/opencl_fdtd/kernels/update_s.cl @@ -1,11 +1,11 @@ /* * Update E-field, including any PMLs. - * + * * Template parameters: * common_header: Rendered contents of common.cl * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities * pml_thickness: Number of cells (integer) - * + * * OpenCL args: * E, H, dt, S, oS */ @@ -17,12 +17,12 @@ /* * Calculate S from oS (pre-calculated components) - */ + */ __global ftype *Sx = S + XX; __global ftype *Sy = S + YY; __global ftype *Sz = S + ZZ; -// Use unscaled S components from H locations +// Use unscaled S components from H locations __global ftype *oSxy = oS + 0 * field_size; __global ftype *oSyz = oS + 1 * field_size; __global ftype *oSzx = oS + 2 * field_size; diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py new file mode 100644 index 0000000..c5d0005 --- /dev/null +++ b/opencl_fdtd/simulation.py @@ -0,0 +1,376 @@ +""" +Class for constructing and holding the basic FDTD operations and fields +""" + +from typing import List, Dict, Callable +from collections import OrderedDict +import numpy +import jinja2 +import warnings + +import pyopencl +import pyopencl.array +from pyopencl.elementwise import ElementwiseKernel + +from fdfd_tools import vec + + +__author__ = 'Jan Petykiewicz' + + +# Create jinja2 env on module load +jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) + + +class Simulation(object): + """ + Constructs and holds the basic FDTD operations and related fields + + After constructing this object, call the (update_E, update_H, update_S) members + to perform FDTD updates on the stored (E, H, S) fields: + + pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np'] + sim = Simulation(grid.grids, do_poynting=True, pmls=pmls) + with open('sources.c', 'w') as f: + f.write('{}'.format(sim.sources)) + + for t in range(max_t): + sim.update_E([]).wait() + + # Find the linear index for the center point, for Ey + ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + \ + numpy.prod(grid.shape) * 1 + # Perturb the field (i.e., add a soft current source) + sim.E[ind] += numpy.sin(omega * t * sim.dt) + event = sim.update_H([]) + if sim.update_S: + event = sim.update_S([event]) + event.wait() + + with lzma.open('saved_simulation', 'wb') as f: + dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f) + + Code in the form + event2 = sim.update_H([event0, event1]) + indicates that the update_H operation should be prepared immediately, but wait for + event0 and event1 to occur (i.e. previous operations to finish) before starting execution. + event2 can then be used to prepare further operations to be run after update_H. + """ + E = None # type: pyopencl.array.Array + H = None # type: pyopencl.array.Array + S = None # type: pyopencl.array.Array + eps = None # type: pyopencl.array.Array + dt = None # type: float + inv_dxes = None # type: List[pyopencl.array.Array] + + arg_type = None # type: numpy.float32 or numpy.float64 + + context = None # type: pyopencl.Context + queue = None # type: pyopencl.CommandQueue + + update_E = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] + update_H = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] + update_S = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] + update_J = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] + sources = None # type: Dict[str, str] + + def __init__(self, + epsilon: List[numpy.ndarray], + pmls: List[Dict[str, int or float]], + bloch_boundaries: List[Dict[str, int or float]] = (), + dxes: List[List[numpy.ndarray]] or float = None, + dt: float = None, + initial_fields: Dict[str, List[numpy.ndarray]] = None, + context: pyopencl.Context = None, + queue: pyopencl.CommandQueue = None, + float_type: numpy.float32 or numpy.float64 = numpy.float32, + do_poynting: bool = True, + do_fieldsrc: bool = False): + """ + Initialize the simulation. + + :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray + spanning the simulation domain. Relative epsilon is used. + :param pmls: List of dicts with keys: + 'axis': One of 'x', 'y', 'z'. + 'direction': One of 'n', 'p'. + 'thickness': Number of layers, default 8. + 'epsilon_eff': Effective epsilon to match to. Default 1.0. + 'mu_eff': Effective mu to match to. Default 1.0. + 'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6. + 'm': Polynomial grading exponent. Default 3.5. + 'ma': Exponent for alpha. Default 1. + :param bloch_boundaries: List of dicts with keys: + 'axis': One of 'x', 'y', 'z'. + 'real': Real part of bloch phase factor (i.e. real(exp(i * phase))) + 'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase))) + :param dt: Time step. Default is min(dxes) * .99/sqrt(3). + :param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the + specified fields (default is 0 everywhere). Fields have same format as epsilon. + :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. + :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called. + :param float_type: numpy.float32 or numpy.float64. Default numpy.float32. + :param do_poynting: If true, enables calculation of the poynting vector, S. + Poynting vector calculation adds the following computational burdens: + * During update_H, ~6 extra additions/cell are performed in order to spatially + average E and temporally average H. These quantities are multiplied + (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly). + * update_S performs a discrete cross product using the precalculated products + from update_H. This is not nice to the cache and similar to e.g. update_E + in complexity. + * GPU memory requirements are approximately doubled, since S and the intermediate + products must be stored. + """ + if initial_fields is None: + initial_fields = {} + + self.shape = epsilon[0].shape + self.arg_type = float_type + self.sources = {} + self._create_context(context, queue) + self._create_eps(epsilon) + + if dxes is None: + dxes = 1.0 + + if isinstance(dxes, (float, int)): + uniform_dx = dxes + min_dx = dxes + else: + uniform_dx = False + self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]] + min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1]) + + max_dt = min_dx * .99 / numpy.sqrt(3) + + if dt is None: + self.dt = max_dt + elif dt > max_dt: + warnings.warn('Warning: unstable dt: {}'.format(dt)) + elif dt <= 0: + raise Exception('Invalid dt: {}'.format(dt)) + else: + self.dt = dt + + self.E = self._create_field(initial_fields.get('E', None)) + self.H = self._create_field(initial_fields.get('H', None)) + if bloch_boundaries: + self.F = self._create_field(initial_fields.get('F', None)) + self.G = self._create_field(initial_fields.get('G', None)) + + for pml in pmls: + pml.setdefault('thickness', 8) + pml.setdefault('epsilon_eff', 1.0) + pml.setdefault('mu_eff', 1.0) + pml.setdefault('ln_R_per_layer', -1.6) + pml.setdefault('m', 3.5) + pml.setdefault('cfs_alpha', 0) + pml.setdefault('ma', 1) + + ctype = type_to_C(self.arg_type) + + def ptr(arg: str) -> str: + return ctype + ' *' + arg + + base_fields = OrderedDict() + base_fields[ptr('E')] = self.E + base_fields[ptr('H')] = self.H + base_fields[ctype + ' dt'] = self.dt + if uniform_dx == False: + inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz'] + for name, field in zip(inv_dx_names, self.inv_dxes): + base_fields[ptr(name)] = field + + eps_field = OrderedDict() + eps_field[ptr('eps')] = self.eps + + if bloch_boundaries: + base_fields[ptr('F')] = self.F + base_fields[ptr('G')] = self.G + + bloch_fields = OrderedDict() + bloch_fields[ptr('E')] = self.F + bloch_fields[ptr('H')] = self.G + bloch_fields[ctype + ' dt'] = self.dt + bloch_fields[ptr('F')] = self.E + bloch_fields[ptr('G')] = self.H + + common_source = jinja_env.get_template('common.cl').render( + ftype=ctype, + shape=self.shape, + ) + jinja_args = { + 'common_header': common_source, + 'pmls': pmls, + 'do_poynting': do_poynting, + 'bloch': bloch_boundaries, + 'uniform_dx': uniform_dx, + } + E_source = jinja_env.get_template('update_e.cl').render(**jinja_args) + H_source = jinja_env.get_template('update_h.cl').render(**jinja_args) + self.sources['E'] = E_source + self.sources['H'] = H_source + + if bloch_boundaries: + bloch_args = jinja_args.copy() + bloch_args['do_poynting'] = False + bloch_args['bloch'] = [{'axis': b['axis'], + 'real': b['imag'], + 'imag': b['real']} + for b in bloch_boundaries] + F_source = jinja_env.get_template('update_e.cl').render(**bloch_args) + G_source = jinja_env.get_template('update_h.cl').render(**bloch_args) + self.sources['F'] = F_source + self.sources['G'] = G_source + + + S_fields = OrderedDict() + if do_poynting: + S_source = jinja_env.get_template('update_s.cl').render(**jinja_args) + self.sources['S'] = S_source + + self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type) + self.S = pyopencl.array.zeros_like(self.E) + S_fields[ptr('oS')] = self.oS + S_fields[ptr('S')] = self.S + + J_fields = OrderedDict() + if do_fieldsrc: + J_source = jinja_env.get_template('update_j.cl').render(**jinja_args) + self.sources['J'] = J_source + + self.Ji = pyopencl.array.zeros_like(self.E) + self.Jr = pyopencl.array.zeros_like(self.E) + J_fields[ptr('Jr')] = self.Jr + J_fields[ptr('Ji')] = self.Ji + + + ''' + PML + ''' + pml_e_fields, pml_h_fields = self._create_pmls(pmls) + if bloch_boundaries: + pml_f_fields, pml_g_fields = self._create_pmls(pmls) + + ''' + Create operations + ''' + self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields)) + self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields)) + if do_poynting: + self.update_S = self._create_operation(S_source, (base_fields, S_fields)) + if bloch_boundaries: + self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields)) + self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields)) + if do_fieldsrc: + args = OrderedDict() + [args.update(d) for d in (base_fields, J_fields)] + var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')] + update = ElementwiseKernel(self.context, operation=J_source, + arguments=', '.join(list(args.keys()) + var_args)) + self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e) + + + def _create_pmls(self, pmls): + ctype = type_to_C(self.arg_type) + def ptr(arg: str) -> str: + return ctype + ' *' + arg + + pml_e_fields = OrderedDict() + pml_h_fields = OrderedDict() + for pml in pmls: + a = 'xyz'.find(pml['axis']) + + sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) + kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff']) + alpha_max = pml['cfs_alpha'] + + def par(x): + scaling = (x / pml['thickness']) ** pml['m'] + sigma = scaling * sigma_max + kappa = 1 + scaling * (kappa_max - 1) + alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max + p0 = numpy.exp(-(sigma / kappa + alpha) * self.dt) + p1 = sigma / (sigma + kappa * alpha) * (p0 - 1) + p2 = 1 / kappa + return p0, p1, p2 + + xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2)) + if pml['polarity'] == 'p': + xe -= 0.5 + elif pml['polarity'] == 'n': + xh -= 0.5 + + pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh'] + for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)): + pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe) + pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph) + + uv = 'xyz'.replace(pml['axis'], '') + psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_' + psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH'] + + psi_shape = list(self.shape) + psi_shape[a] = pml['thickness'] + + for ne, nh in zip(*psi_names): + pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) + pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) + return pml_e_fields, pml_h_fields + + def _create_operation(self, source, args_fields): + args = OrderedDict() + [args.update(d) for d in args_fields] + update = ElementwiseKernel(self.context, operation=source, + arguments=', '.join(args.keys())) + return lambda e: update(*args.values(), wait_for=e) + + def _create_context(self, context: pyopencl.Context = None, + queue: pyopencl.CommandQueue = None): + if context is None: + self.context = pyopencl.create_some_context() + else: + self.context = context + + if queue is None: + self.queue = pyopencl.CommandQueue(self.context) + else: + self.queue = queue + + def _create_eps(self, epsilon: List[numpy.ndarray]): + if len(epsilon) != 3: + raise Exception('Epsilon must be a list with length of 3') + if not all((e.shape == epsilon[0].shape for e in epsilon[1:])): + raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon]) + if not epsilon[0].shape == self.shape: + raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape)) + self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type)) + + def _create_field(self, initial_value: List[numpy.ndarray] = None): + if initial_value is None: + return pyopencl.array.zeros_like(self.eps) + else: + if len(initial_value) != 3: + Exception('Initial field value must be a list of length 3') + if not all((f.shape == self.shape for f in initial_value)): + Exception('Initial field list elements must have same shape as epsilon elements') + return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type)) + + +def type_to_C(float_type) -> str: + """ + Returns a string corresponding to the C equivalent of a numpy type. + Only works for float16, float32, float64. + + :param float_type: e.g. numpy.float32 + :return: string containing the corresponding C type (eg. 'double') + """ + if float_type == numpy.float16: + arg_type = 'half' + elif float_type == numpy.float32: + arg_type = 'float' + elif float_type == numpy.float64: + arg_type = 'double' + else: + raise Exception('Unsupported type') + return arg_type diff --git a/requirements.txt b/requirements.txt index 6ed651b..6626171 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ numpy h5py pyopencl jinja2 --e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster --e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock --e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque --e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools +-e git+https://mpxd.net/code/jan/float_raster.git@release#egg=float_raster +-e git+https://mpxd.net/code/jan/gridlock.git@release#egg=gridlock +-e git+https://mpxd.net/code/jan/masque.git@release#egg=masque +-e git+https://mpxd.net/code/jan/fdfd_tools.git@master#egg=fdfd_tools diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b7fd3cf --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 + +from setuptools import setup, find_packages +import opencl_fdtd + +with open('README.md', 'r') as f: + long_description = f.read() + +setup(name='opencl_fdtd', + version=opencl_fdtd.version, + description='OpenCL FDTD solver', + long_description=long_description, + long_description_content_type='text/markdown', + author='Jan Petykiewicz', + author_email='anewusername@gmail.com', + url='https://mpxd.net/code/jan/opencl_fdtd', + packages=find_packages(), + package_data={ + 'opencl_fdfd': ['kernels/*'] + }, + install_requires=[ + 'numpy', + 'pyopencl', + 'jinja2', + 'fdfd_tools>=0.3', + ], + extras_require={ + }, + ) +