From b372130c6b91030a7cc56842a07de569b792bec4 Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 16 Apr 2017 02:51:09 -0700 Subject: [PATCH 01/29] try out opencl image3d_t... it runs in the base case, no idea if correctly --- fdtd.py | 4 +-- fdtd/kernels/common.cl | 36 +++----------------------- fdtd/simulation.py | 59 ++++++++++++++++++++++++++---------------- 3 files changed, 42 insertions(+), 57 deletions(-) diff --git a/fdtd.py b/fdtd.py index bbdabec..d41a038 100644 --- a/fdtd.py +++ b/fdtd.py @@ -122,7 +122,7 @@ def main(): print('grid shape: {}'.format(grid.shape)) # #### Create the simulation grid #### - sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) + sim = Simulation(grid.grids, do_poynting=False, pmls=[]) # Source parameters and function w = 2 * numpy.pi * dx / wl @@ -150,7 +150,7 @@ def main(): sim.update_E([]).wait() ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape) - sim.E[ind] += field_source(t) +# sim.buf[ind] += field_source(t) e = sim.update_H([]) if sim.update_S: e = sim.update_S([e]) diff --git a/fdtd/kernels/common.cl b/fdtd/kernels/common.cl index d203ca7..e43336b 100644 --- a/fdtd/kernels/common.cl +++ b/fdtd/kernels/common.cl @@ -2,14 +2,10 @@ /* Common code for E, H updates * * Template parameters: - * ftype type name (e.g. float or double) * shape list of 3 ints specifying shape of fields */ #} -typedef {{ftype}} ftype; - - /* * Field size info */ @@ -18,13 +14,6 @@ const size_t sy = {{shape[1]}}; const size_t sz = {{shape[2]}}; const size_t field_size = sx * sy * sz; -//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if -// i is outside the bounds of Ex[]. -if (i >= field_size) { - PYOPENCL_ELWISE_CONTINUE; -} - - /* * Array indexing */ @@ -42,25 +31,6 @@ const size_t diy = sz; const size_t diz = 1; -/* - * Pointer math - */ -//Pointer offsets into the components of a linearized vector-field -// (eg. Hx = H + XX, where H and Hx are pointers) -const size_t XX = 0; -const size_t YY = field_size; -const size_t ZZ = field_size * 2; - -//Define pointers to vector components of each field (eg. Hx = H + XX) -__global ftype *Ex = E + XX; -__global ftype *Ey = E + YY; -__global ftype *Ez = E + ZZ; - -__global ftype *Hx = H + XX; -__global ftype *Hy = H + YY; -__global ftype *Hz = H + ZZ; - - /* * Implement periodic boundary conditions * @@ -73,9 +43,9 @@ __global ftype *Hz = H + ZZ; * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix . */ {% for r in 'xyz' %} -int m{{r}} = -di{{r}}; -int p{{r}} = +di{{r}}; -int wrap_{{r}} = (s{{r}} - 1) * di{{r}}; +int m{{r}} = -1; +int p{{r}} = +1; +int wrap_{{r}} = s{{r}} - 1; if ( {{r}} == 0 ) { m{{r}} = wrap_{{r}}; } else if ( {{r}} == s{{r}} - 1 ) { diff --git a/fdtd/simulation.py b/fdtd/simulation.py index f1a8dec..9ba9b88 100644 --- a/fdtd/simulation.py +++ b/fdtd/simulation.py @@ -17,6 +17,7 @@ from fdfd_tools import vec __author__ = 'Jan Petykiewicz' +float4 = pyopencl.array.vec.float4 # Create jinja2 env on module load jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) @@ -88,12 +89,23 @@ class Simulation(object): else: self.dt = dt + def make4d(f): + g = [numpy.transpose(fi)[:,:,:,None] for fi in f] + h = g + [numpy.empty_like(g[0])] + j = numpy.concatenate(h, axis=3) + return numpy.ascontiguousarray(j, dtype=numpy.float32) + self.arg_type = float_type self.sources = {} - self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type)) + ef = make4d(epsilon).astype(float_type) + self.eps = pyopencl.image_from_array(self.context, + make4d(epsilon), num_channels=4, mode='r') + + self.buf = pyopencl.array.Array(self.queue, shape=epsilon.shape, dtype=float4) if initial_E is None: - self.E = pyopencl.array.zeros_like(self.eps) + self.E = pyopencl.image_from_array(self.context, + make4d(epsilon) * 0, num_channels=4, mode='r') else: if len(initial_E) != 3: Exception('Initial_E must be a list of length 3') @@ -102,7 +114,8 @@ class Simulation(object): self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type)) if initial_H is None: - self.H = pyopencl.array.zeros_like(self.eps) + self.H = pyopencl.image_from_array(self.context, + make4d(epsilon) * 0, num_channels=4, mode='r') else: if len(initial_H) != 3: Exception('Initial_H must be a list of length 3') @@ -119,15 +132,8 @@ class Simulation(object): return ctype + ' *' + arg base_fields = OrderedDict() - base_fields[ptr('E')] = self.E - base_fields[ptr('H')] = self.H - base_fields[ctype + ' dt'] = self.dt - - eps_field = OrderedDict() - eps_field[ptr('eps')] = self.eps common_source = jinja_env.get_template('common.cl').render( - ftype=ctype, shape=epsilon[0].shape, ) jinja_args = { @@ -136,8 +142,8 @@ class Simulation(object): 'pmls': pmls, 'do_poynting': do_poynting, } - E_source = jinja_env.get_template('update_e.cl').render(**jinja_args) - H_source = jinja_env.get_template('update_h.cl').render(**jinja_args) + E_source = jinja_env.get_template('update_e_full.cl').render(**jinja_args) + H_source = jinja_env.get_template('update_h_full.cl').render(**jinja_args) self.sources['E'] = E_source self.sources['H'] = H_source @@ -198,17 +204,26 @@ class Simulation(object): ''' Create operations ''' - E_args = OrderedDict() - [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)] - E_update = ElementwiseKernel(self.context, operation=E_source, - arguments=', '.join(E_args.keys())) + E_update = pyopencl.Program(self.context, E_source).build().update_e + H_update = pyopencl.Program(self.context, H_source).build().update_h + max_gs = E_update.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, + self.queue.device) + gs, ls = self.buf.get_sizes(self.queue, max_gs) + print('gs', gs, ls, max_gs) + def update_E(e): + e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.E, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e) + e = E_update(self.queue, gs, ls, self.buf.data, self.H, numpy.float32(dt), self.eps, numpy.uint32(self.buf.size), wait_for=[e]) + return e - H_args = OrderedDict() - [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)] - H_update = ElementwiseKernel(self.context, operation=H_source, - arguments=', '.join(H_args.keys())) - self.update_E = lambda e: E_update(*E_args.values(), wait_for=e) - self.update_H = lambda e: H_update(*H_args.values(), wait_for=e) + def update_H(e): + e = pyopencl.enqueue_copy(self.queue, self.E, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e) + e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.H, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e]) + e = H_update(self.queue, gs, ls, self.E, self.buf.data, numpy.float32(dt), numpy.uint32(self.buf.size), wait_for=[e]) + e = pyopencl.enqueue_copy(self.queue, self.H, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e]) + return e + + self.update_E = update_E + self.update_H = update_H if do_poynting: S_args = OrderedDict() From 97d9901e4b0356d4a8ab77d65013e7f5b73bc009 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sat, 12 Aug 2017 19:20:29 -0700 Subject: [PATCH 02/29] Use logging package for output --- fdtd.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fdtd.py b/fdtd.py index bbdabec..961bc0e 100644 --- a/fdtd.py +++ b/fdtd.py @@ -6,6 +6,7 @@ See main() for simulation setup. import sys import time +import logging import numpy import lzma @@ -20,6 +21,9 @@ import fdfd_tools __author__ = 'Jan Petykiewicz' +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern: """ @@ -120,7 +124,7 @@ def main(): eps=n_air**2, polygons=mask.as_polygons()) - print('grid shape: {}'.format(grid.shape)) + logger.info('grid shape: {}'.format(grid.shape)) # #### Create the simulation grid #### sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) @@ -157,7 +161,7 @@ def main(): e.wait() if t % 100 == 0: - print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) + logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start))) sys.stdout.flush() with lzma.open('saved_simulation', 'wb') as f: From 0d91f0d43efba28e91dc1a88b5f3acab9294b799 Mon Sep 17 00:00:00 2001 From: jan Date: Mon, 14 Aug 2017 15:41:20 -0700 Subject: [PATCH 03/29] rename lib --- fdtd.py | 7 ++++--- fdtd/__init__.py | 0 opencl_fdtd/__init__.py | 1 + {fdtd => opencl_fdtd}/kernels/common.cl | 0 {fdtd => opencl_fdtd}/kernels/update_e.cl | 0 {fdtd => opencl_fdtd}/kernels/update_h.cl | 0 {fdtd => opencl_fdtd}/kernels/update_s.cl | 0 {fdtd => opencl_fdtd}/simulation.py | 6 +++--- 8 files changed, 8 insertions(+), 6 deletions(-) delete mode 100644 fdtd/__init__.py create mode 100644 opencl_fdtd/__init__.py rename {fdtd => opencl_fdtd}/kernels/common.cl (100%) rename {fdtd => opencl_fdtd}/kernels/update_e.cl (100%) rename {fdtd => opencl_fdtd}/kernels/update_h.cl (100%) rename {fdtd => opencl_fdtd}/kernels/update_s.cl (100%) rename {fdtd => opencl_fdtd}/simulation.py (99%) diff --git a/fdtd.py b/fdtd.py index bbdabec..13af0fd 100644 --- a/fdtd.py +++ b/fdtd.py @@ -8,10 +8,10 @@ import sys import time import numpy -import lzma +import lzma import dill -from fdtd.simulation import Simulation +from opencl_fdtd import Simulation from masque import Pattern, shapes import gridlock import pcgen @@ -133,7 +133,7 @@ def main(): def field_source(i): t0 = i * sim.dt - delay return numpy.sin(w * t0) * numpy.exp(-alpha * t0**2) - + with open('sources.c', 'w') as f: f.write(sim.sources['E']) f.write('\n==========================================\n') @@ -172,5 +172,6 @@ def main(): d['S'] = unvec(sim.S.get()) dill.dump(d, f) + if __name__ == '__main__': main() diff --git a/fdtd/__init__.py b/fdtd/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py new file mode 100644 index 0000000..e3e95b4 --- /dev/null +++ b/opencl_fdtd/__init__.py @@ -0,0 +1 @@ +from .simulation import Simulation, type_to_C diff --git a/fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl similarity index 100% rename from fdtd/kernels/common.cl rename to opencl_fdtd/kernels/common.cl diff --git a/fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl similarity index 100% rename from fdtd/kernels/update_e.cl rename to opencl_fdtd/kernels/update_e.cl diff --git a/fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl similarity index 100% rename from fdtd/kernels/update_h.cl rename to opencl_fdtd/kernels/update_h.cl diff --git a/fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl similarity index 100% rename from fdtd/kernels/update_s.cl rename to opencl_fdtd/kernels/update_s.cl diff --git a/fdtd/simulation.py b/opencl_fdtd/simulation.py similarity index 99% rename from fdtd/simulation.py rename to opencl_fdtd/simulation.py index f1a8dec..77a95df 100644 --- a/fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -152,7 +152,7 @@ class Simulation(object): S_fields[ptr('oS')] = self.oS S_fields[ptr('S')] = self.S else: - S_fields = OrderedDict() + S_fields = OrderedDict() ''' PML @@ -167,7 +167,7 @@ class Simulation(object): p0 = numpy.exp(-(sigma + alpha) * dt) p1 = sigma / (sigma + alpha) * (p0 - 1) return p0, p1 - + xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4)) xep -= 0.5 xhn -= 0.5 @@ -190,7 +190,7 @@ class Simulation(object): for ne, nh in zip(*psi_names): pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) - + self.pml_e_fields = pml_e_fields self.pml_h_fields = pml_h_fields From a85f5477490f8f175dc42e2e61a67026b4c9063a Mon Sep 17 00:00:00 2001 From: jan Date: Thu, 24 Aug 2017 11:28:03 -0700 Subject: [PATCH 04/29] doc updates --- README.md | 4 ++- opencl_fdtd/kernels/update_e.cl | 4 +-- opencl_fdtd/kernels/update_h.cl | 4 +-- opencl_fdtd/kernels/update_s.cl | 8 ++--- opencl_fdtd/simulation.py | 52 ++++++++++++++++++++++++++++++--- 5 files changed, 59 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index fbc972b..26d5acd 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,12 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs). * numpy * pyopencl * jinja2 +* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools) + +Optional (used for examples): * dill (for file output) * [gridlock](https://mpxd.net/gogs/jan/gridlock) * [masque](https://mpxd.net/gogs/jan/masque) -* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools) To get the code, just clone this repository: ```bash diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index 2728b46..4bda993 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -1,11 +1,11 @@ /* * Update E-field, including any PMLs. - * + * * Template parameters: * common_header: Rendered contents of common.cl * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities * pml_thickness: Number of cells (integer) - * + * * OpenCL args: * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] */ diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index e093cb9..8519b51 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -1,12 +1,12 @@ /* * Update H-field, including any PMLs. * Also precalculate values for poynting vector if necessary. - * + * * Template parameters: * common_header: Rendered contents of common.cl * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities * pml_thickness: Number of cells (integer) - * do_poynting: Whether to precalculate poynting vector components (boolean) + * do_poynting: Whether to precalculate poynting vector components (boolean) * * OpenCL args: * E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS] diff --git a/opencl_fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl index 0310411..94e061e 100644 --- a/opencl_fdtd/kernels/update_s.cl +++ b/opencl_fdtd/kernels/update_s.cl @@ -1,11 +1,11 @@ /* * Update E-field, including any PMLs. - * + * * Template parameters: * common_header: Rendered contents of common.cl * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities * pml_thickness: Number of cells (integer) - * + * * OpenCL args: * E, H, dt, S, oS */ @@ -17,12 +17,12 @@ /* * Calculate S from oS (pre-calculated components) - */ + */ __global ftype *Sx = S + XX; __global ftype *Sy = S + YY; __global ftype *Sz = S + ZZ; -// Use unscaled S components from H locations +// Use unscaled S components from H locations __global ftype *oSxy = oS + 0 * field_size; __global ftype *oSyz = oS + 1 * field_size; __global ftype *oSzx = oS + 2 * field_size; diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 77a95df..7a49f39 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -25,6 +25,35 @@ jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) class Simulation(object): """ Constructs and holds the basic FDTD operations and related fields + + After constructing this object, call the (update_E, update_H, update_S) members + to perform FDTD updates on the stored (E, H, S) fields: + + sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) + with open('sources.c', 'w') as f: + f.write('{}'.format(sim.sources)) + + for t in range(max_t): + sim.update_E([]).wait() + + # Find the linear index for the center point, for Ey + ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + \ + numpy.prod(grid.shape) * 1 + # Perturb the field (i.e., add a soft current source) + sim.E[ind] += numpy.sin(omega * t * sim.dt) + event = sim.update_H([]) + if sim.update_S: + event = sim.update_S([event]) + event.wait() + + with lzma.open('saved_simulation', 'wb') as f: + dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f) + + Code in the form + event2 = sim.update_H([event0, event1]) + indicates that the update_H operation should be prepared immediately, but wait for + event0 and event1 to occur (i.e. previous operations to finish) before starting execution. + event2 can then be used to prepare further operations to be run after update_H. """ E = None # type: List[pyopencl.array.Array] H = None # type: List[pyopencl.array.Array] @@ -37,9 +66,9 @@ class Simulation(object): context = None # type: pyopencl.Context queue = None # type: pyopencl.CommandQueue - update_E = None # type: Callable[[],pyopencl.Event] - update_H = None # type: Callable[[],pyopencl.Event] - update_S = None # type: Callable[[],pyopencl.Event] + update_E = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] + update_H = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] + update_S = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] sources = None # type: Dict[str, str] def __init__(self, @@ -50,8 +79,8 @@ class Simulation(object): context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None, float_type: numpy.float32 or numpy.float64 = numpy.float32, - pml_thickness: int = 10, pmls: List[List[str]] = None, + pml_thickness: int = 10, do_poynting: bool = True): """ Initialize the simulation. @@ -64,6 +93,21 @@ class Simulation(object): :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called. :param float_type: numpy.float32 or numpy.float64. Default numpy.float32. + :param pmls: List of [axis, direction] pairs which specify simluation boundaries to be + 'coated' with a PML (absorbing layer). Axis should be one of 'x', 'y', 'z', and + direction should be one of 'n', 'p' (i.e., negative, positive). + Default is to apply PMLs to all six boundaries. + :param pml_thickness: Thickness of any PMLs, in number of grid cells. Default 10. + :param do_poynting: If true, enables calculation of the poynting vector, S. + Poynting vector calculation adds the following computational burdens: + * During update_H, ~6 extra additions/cell are performed in order to spatially + average E and temporally average H. These quantities are multiplied + (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly). + * update_S performs a discrete cross product using the precalculated products + from update_H. This is not nice to the cache and similar to e.g. update_E + in complexity. + * GPU memory requirements are approximately doubled, since S and the intermediate + products must be stored. """ if len(epsilon) != 3: From d02cd18403e8c07b8c3de7a10b335fec180a148a Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 1 Oct 2017 12:34:11 -0700 Subject: [PATCH 05/29] improve pml specification --- fdtd.py | 4 +- opencl_fdtd/kernels/update_e.cl | 17 +++---- opencl_fdtd/kernels/update_h.cl | 20 ++++---- opencl_fdtd/simulation.py | 82 +++++++++++++++++++-------------- 4 files changed, 69 insertions(+), 54 deletions(-) diff --git a/fdtd.py b/fdtd.py index 5d0f4c3..08606c6 100644 --- a/fdtd.py +++ b/fdtd.py @@ -126,7 +126,9 @@ def main(): logger.info('grid shape: {}'.format(grid.shape)) # #### Create the simulation grid #### - sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) + pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness} + for a in 'xyz' for p in 'np'] + sim = Simulation(grid.grids, do_poynting=True, pmls=pmls) # Source parameters and function w = 2 * numpy.pi * dx / wl diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index 4bda993..f7304e7 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -3,8 +3,8 @@ * * Template parameters: * common_header: Rendered contents of common.cl - * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities - * pml_thickness: Number of cells (integer) + * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing + axes, polarities, and thicknesses. * * OpenCL args: * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] @@ -18,9 +18,6 @@ __global ftype *epsx = eps + XX; __global ftype *epsy = eps + YY; __global ftype *epsz = eps + ZZ; -{% if pmls -%} -const int pml_thickness = {{pml_thickness}}; -{%- endif %} /* * Precalclate derivatives @@ -42,7 +39,9 @@ ftype pExi = 0; ftype pEyi = 0; ftype pEzi = 0; -{% for r, p in pmls -%} +{% for pml in pmls -%} + {%- set r = pml['axis'] -%} + {%- set p = pml['polarity'] -%} {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%} {%- if r != 'y' -%} @@ -51,14 +50,16 @@ ftype pEzi = 0; {%- set se, sh = '+', '-' -%} {%- endif -%} +pml_{{r ~ p}}_thickness = {{pml['thickness']}}; + {%- if p == 'n' %} -if ( {{r}} < pml_thickness ) { +if ( {{r}} < pml_{{r ~ p}_thickness ) { const size_t ir = {{r}}; // index into pml parameters {%- elif p == 'p' %} -if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}_thickness ) { const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters {%- endif %} diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 8519b51..7c648b6 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -4,22 +4,18 @@ * * Template parameters: * common_header: Rendered contents of common.cl - * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities - * pml_thickness: Number of cells (integer) + * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing + axes, polarities, and thicknesses. * do_poynting: Whether to precalculate poynting vector components (boolean) * * OpenCL args: - * E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS] + * E, H, dt, [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS] */ {{common_header}} //////////////////////////////////////////////////////////////////////////// -{% if pmls -%} -const int pml_thickness = {{pml_thickness}}; -{%- endif %} - /* * Precalculate derivatives */ @@ -57,7 +53,9 @@ ftype pHxi = 0; ftype pHyi = 0; ftype pHzi = 0; -{%- for r, p in pmls -%} +{% for pml in pmls -%} + {%- set r = pml['axis'] -%} + {%- set p = pml['polarity'] -%} {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%} {%- if r != 'y' -%} @@ -66,14 +64,16 @@ ftype pHzi = 0; {%- set se, sh = '+', '-' -%} {%- endif -%} +pml_{{r ~ p}}_thickness = {{pml['thickness']}}; + {%- if p == 'n' %} -if ( {{r}} < pml_thickness ) { +if ( {{r}} < pml_{{r ~ p}}_thickness ) { const size_t ir = {{r}}; // index into pml parameters {%- elif p == 'p' %} -if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters {%- endif %} diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 7a49f39..78460ec 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -29,7 +29,8 @@ class Simulation(object): After constructing this object, call the (update_E, update_H, update_S) members to perform FDTD updates on the stored (E, H, S) fields: - sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) + pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np'] + sim = Simulation(grid.grids, do_poynting=True, pmls=pmls) with open('sources.c', 'w') as f: f.write('{}'.format(sim.sources)) @@ -73,31 +74,34 @@ class Simulation(object): def __init__(self, epsilon: List[numpy.ndarray], + pmls: List[Dict[str, int or float]], dt: float = .99/numpy.sqrt(3), initial_E: List[numpy.ndarray] = None, initial_H: List[numpy.ndarray] = None, context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None, float_type: numpy.float32 or numpy.float64 = numpy.float32, - pmls: List[List[str]] = None, - pml_thickness: int = 10, do_poynting: bool = True): """ Initialize the simulation. :param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray spanning the simulation domain. Relative epsilon is used. - :param dt: Time step. Default is the Courant factor. + :param pmls: List of dicts with keys: + 'axis': One of 'x', 'y', 'z'. + 'direction': One of 'n', 'p'. + 'thickness': Number of layers, default 8. + 'epsilon_eff': Effective epsilon to match to. Default 1.0. + 'mu_eff': Effective mu to match to. Default 1.0. + 'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6. + 'm': Polynomial grading exponent. Default 3.5. + 'ma': Exponent for alpha. Default 1. + :param dt: Time step. Default is .99/sqrt(3). :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon. :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon. :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called. :param float_type: numpy.float32 or numpy.float64. Default numpy.float32. - :param pmls: List of [axis, direction] pairs which specify simluation boundaries to be - 'coated' with a PML (absorbing layer). Axis should be one of 'x', 'y', 'z', and - direction should be one of 'n', 'p' (i.e., negative, positive). - Default is to apply PMLs to all six boundaries. - :param pml_thickness: Thickness of any PMLs, in number of grid cells. Default 10. :param do_poynting: If true, enables calculation of the poynting vector, S. Poynting vector calculation adds the following computational burdens: * During update_H, ~6 extra additions/cell are performed in order to spatially @@ -154,8 +158,13 @@ class Simulation(object): Exception('Initial_H list elements must have same shape as epsilon elements') self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type)) - if pmls is None: - pmls = [[d, p] for d in 'xyz' for p in 'np'] + for pml in pmls: + pml.setdefault('thickness', 8) + pml.setdefault('epsilon_eff', 1.0) + pml.setdefault('mu_eff', 1.0) + pml.setdefault('ln_R_per_layer', -1.6) + pml.setdefault('m', 3.5) + pml.setdefault('ma', 1) ctype = type_to_C(self.arg_type) @@ -176,7 +185,6 @@ class Simulation(object): ) jinja_args = { 'common_header': common_source, - 'pml_thickness': pml_thickness, 'pmls': pmls, 'do_poynting': do_poynting, } @@ -201,35 +209,39 @@ class Simulation(object): ''' PML ''' - m = (3.5, 1) - sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0) - alpha_max = 0 # TODO: Decide what to do about non-zero alpha - - def par(x): - sigma = ((x / pml_thickness) ** m[0]) * sigma_max - alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max - p0 = numpy.exp(-(sigma + alpha) * dt) - p1 = sigma / (sigma + alpha) * (p0 - 1) - return p0, p1 - - xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4)) - xep -= 0.5 - xhn -= 0.5 - - pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh'] pml_e_fields = OrderedDict() pml_h_fields = OrderedDict() - for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)): - pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe) - pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph) - for pml in pmls: - uv = 'xyz'.replace(pml[0], '') - psi_base = 'Psi_' + ''.join(pml) + '_' + a = 'xyz'.find(pml['axis']) + + sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) / \ + numpy.sqrt(pml['epsilon_eff'] * pml['mu_eff']) + alpha_max = 0 # TODO: Nonzero alpha + + def par(x): + sigma = ((x / pml['thickness']) ** pml['m']) * sigma_max + alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max + p0 = numpy.exp(-(sigma + alpha) * dt) + p1 = sigma / (sigma + alpha) * (p0 - 1) + return p0, p1 + + xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=float_type)[::-1] for _ in range(2)) + if pml['polarity'] == 'p': + xe -= 0.5 + elif pml['polarity'] == 'n': + xh -= 0.5 + + pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '01'] for eh in 'eh'] + for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)): + pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe) + pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph) + + uv = 'xyz'.replace(pml['axis'], '') + psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_' psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH'] psi_shape = list(epsilon[0].shape) - psi_shape['xyz'.find(pml[0])] = pml_thickness + psi_shape[a] = pml['thickness'] for ne, nh in zip(*psi_names): pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) From 6bd756c3d3ab8025ae3f317cc9e8e243987bf7a4 Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 1 Oct 2017 12:37:56 -0700 Subject: [PATCH 06/29] add setup.py --- setup.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a2f9f9d --- /dev/null +++ b/setup.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +setup(name='opencl_fdtd', + version='0.4', + description='OpenCL FDTD solver', + author='Jan Petykiewicz', + author_email='anewusername@gmail.com', + url='https://mpxd.net/gogs/jan/opencl_fdtd', + packages=find_packages(), + package_data={ + 'opencl_fdfd': ['kernels/*'] + }, + install_requires=[ + 'numpy', + 'pyopencl', + 'jinja2', + 'fdfd_tools>=0.3', + ], + extras_require={ + }, + ) + From 9772f47a3482eb7a57872c0219d85753db8822ef Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 1 Oct 2017 14:01:22 -0700 Subject: [PATCH 07/29] fix typo --- opencl_fdtd/kernels/update_e.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index f7304e7..9162b21 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -54,12 +54,12 @@ pml_{{r ~ p}}_thickness = {{pml['thickness']}}; {%- if p == 'n' %} -if ( {{r}} < pml_{{r ~ p}_thickness ) { +if ( {{r}} < pml_{{r ~ p}}_thickness ) { const size_t ir = {{r}}; // index into pml parameters {%- elif p == 'p' %} -if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}_thickness ) { +if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters {%- endif %} From 6a03977a699a1cf5035e48f10d9b02496b0978f5 Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 1 Oct 2017 13:11:19 -0700 Subject: [PATCH 08/29] fix pml param names --- opencl_fdtd/kernels/update_e.cl | 4 ++-- opencl_fdtd/kernels/update_h.cl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index 9162b21..63a0c6f 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -64,8 +64,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { {%- endif %} const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi - {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}}; - {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}}; + {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}}; + {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}}; pE{{u}}i {{se}}= {{psi ~ u}}[ip]; pE{{v}}i {{sh}}= {{psi ~ v}}[ip]; } diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 7c648b6..cdda3a9 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -78,8 +78,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { {%- endif %} const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi - {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}}; - {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}}; + {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}}; + {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}}; pH{{u}}i {{sh}}= {{psi ~ u}}[ip]; pH{{v}}i {{se}}= {{psi ~ v}}[ip]; } From a67009d1c889208f1d6ec5fe2d91788d24a898b5 Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 1 Oct 2017 13:14:27 -0700 Subject: [PATCH 09/29] fix declaration --- opencl_fdtd/kernels/update_e.cl | 2 +- opencl_fdtd/kernels/update_h.cl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index 63a0c6f..eed4ee5 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -50,7 +50,7 @@ ftype pEzi = 0; {%- set se, sh = '+', '-' -%} {%- endif -%} -pml_{{r ~ p}}_thickness = {{pml['thickness']}}; +int pml_{{r ~ p}}_thickness = {{pml['thickness']}}; {%- if p == 'n' %} diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index cdda3a9..27d7a47 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -64,7 +64,7 @@ ftype pHzi = 0; {%- set se, sh = '+', '-' -%} {%- endif -%} -pml_{{r ~ p}}_thickness = {{pml['thickness']}}; +int pml_{{r ~ p}}_thickness = {{pml['thickness']}}; {%- if p == 'n' %} From aab57412a5b1ab39a4f2426c697612f20f1f3221 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Mon, 15 Jan 2018 22:36:40 -0800 Subject: [PATCH 10/29] move code to new location --- README.md | 8 ++++---- requirements.txt | 8 ++++---- setup.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 26d5acd..731450b 100644 --- a/README.md +++ b/README.md @@ -27,16 +27,16 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs). * numpy * pyopencl * jinja2 -* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools) +* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools) Optional (used for examples): * dill (for file output) -* [gridlock](https://mpxd.net/gogs/jan/gridlock) -* [masque](https://mpxd.net/gogs/jan/masque) +* [gridlock](https://mpxd.net/code/jan/gridlock) +* [masque](https://mpxd.net/code/jan/masque) To get the code, just clone this repository: ```bash -git clone https://mpxd.net/gogs/jan/opencl_fdtd.git +git clone https://mpxd.net/code/jan/opencl_fdtd.git ``` You can install the requirements and their dependencies easily with diff --git a/requirements.txt b/requirements.txt index 6ed651b..6626171 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ numpy h5py pyopencl jinja2 --e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster --e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock --e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque --e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools +-e git+https://mpxd.net/code/jan/float_raster.git@release#egg=float_raster +-e git+https://mpxd.net/code/jan/gridlock.git@release#egg=gridlock +-e git+https://mpxd.net/code/jan/masque.git@release#egg=masque +-e git+https://mpxd.net/code/jan/fdfd_tools.git@master#egg=fdfd_tools diff --git a/setup.py b/setup.py index a2f9f9d..55f5af9 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup(name='opencl_fdtd', description='OpenCL FDTD solver', author='Jan Petykiewicz', author_email='anewusername@gmail.com', - url='https://mpxd.net/gogs/jan/opencl_fdtd', + url='https://mpxd.net/code/jan/opencl_fdtd', packages=find_packages(), package_data={ 'opencl_fdfd': ['kernels/*'] From c8298d916fbec02925030d977288ab411d6d49c9 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 16 Sep 2018 20:12:26 -0700 Subject: [PATCH 11/29] Use python3 for setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 55f5af9..c7e55f3 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 from setuptools import setup, find_packages From d0b303523e735193c021a7b7de64841dbcc47bc3 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 16 Sep 2018 20:12:41 -0700 Subject: [PATCH 12/29] Move version string into module --- opencl_fdtd/__init__.py | 4 ++++ setup.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/opencl_fdtd/__init__.py b/opencl_fdtd/__init__.py index e3e95b4..b621c19 100644 --- a/opencl_fdtd/__init__.py +++ b/opencl_fdtd/__init__.py @@ -1 +1,5 @@ from .simulation import Simulation, type_to_C + +__author__ = 'Jan Petykiewicz' + +version = '0.4' diff --git a/setup.py b/setup.py index c7e55f3..8b098db 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,10 @@ #!/usr/bin/env python3 from setuptools import setup, find_packages +import opencl_fdtd setup(name='opencl_fdtd', - version='0.4', + version=opencl_fdtd.version, description='OpenCL FDTD solver', author='Jan Petykiewicz', author_email='anewusername@gmail.com', From 1de6fb0e39f39ad2b047116668ab44de0a5429e3 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Sun, 16 Sep 2018 20:12:52 -0700 Subject: [PATCH 13/29] Use readme as long_description --- setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/setup.py b/setup.py index 8b098db..b7fd3cf 100644 --- a/setup.py +++ b/setup.py @@ -3,9 +3,14 @@ from setuptools import setup, find_packages import opencl_fdtd +with open('README.md', 'r') as f: + long_description = f.read() + setup(name='opencl_fdtd', version=opencl_fdtd.version, description='OpenCL FDTD solver', + long_description=long_description, + long_description_content_type='text/markdown', author='Jan Petykiewicz', author_email='anewusername@gmail.com', url='https://mpxd.net/code/jan/opencl_fdtd', From ea5e298023e74eb3c1865658cfdaedb1f06d9c58 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 01:35:20 -0800 Subject: [PATCH 14/29] Explicitly cast to int --- opencl_fdtd/kernels/common.cl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opencl_fdtd/kernels/common.cl b/opencl_fdtd/kernels/common.cl index d203ca7..deecec9 100644 --- a/opencl_fdtd/kernels/common.cl +++ b/opencl_fdtd/kernels/common.cl @@ -73,9 +73,9 @@ __global ftype *Hz = H + ZZ; * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix . */ {% for r in 'xyz' %} -int m{{r}} = -di{{r}}; -int p{{r}} = +di{{r}}; -int wrap_{{r}} = (s{{r}} - 1) * di{{r}}; +int m{{r}} = - (int) di{{r}}; +int p{{r}} = + (int) di{{r}}; +int wrap_{{r}} = (s{{r}} - 1) * (int) di{{r}}; if ( {{r}} == 0 ) { m{{r}} = wrap_{{r}}; } else if ( {{r}} == s{{r}} - 1 ) { From 1e874cb0c029cae2beeb4a0fae5d60de53106a83 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 01:36:13 -0800 Subject: [PATCH 15/29] Fix sign on H component of PML --- opencl_fdtd/kernels/update_h.cl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 27d7a47..968c2b6 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -96,9 +96,9 @@ ftype Hz_old = Hz[i]; {%- endif %} // H update equations -Hx[i] -= dt * (dEzy - dEyz + pHxi); -Hy[i] -= dt * (dExz - dEzx + pHyi); -Hz[i] -= dt * (dEyx - dExy + pHzi); +Hx[i] -= dt * (dEzy - dEyz - pHxi); +Hy[i] -= dt * (dExz - dEzx - pHyi); +Hz[i] -= dt * (dEyx - dExy - pHzi); {% if do_poynting -%} // Average H across timesteps From 2b1d906665c6394561b613ba62af832bbfb72ea3 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 01:38:28 -0800 Subject: [PATCH 16/29] Add _create_field() and _create_eps() --- opencl_fdtd/simulation.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 78460ec..525d818 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -273,7 +273,24 @@ class Simulation(object): arguments=', '.join(S_args.keys())) self.update_S = lambda e: S_update(*S_args.values(), wait_for=e) + def _create_eps(self, epsilon: List[numpy.ndarray]): + if len(epsilon) != 3: + raise Exception('Epsilon must be a list with length of 3') + if not all((e.shape == epsilon[0].shape for e in epsilon[1:])): + raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon]) + if not epsilon[0].shape == self.shape: + raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape)) + self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type)) + def _create_field(self, initial_value: List[numpy.ndarray] = None): + if initial_value is None: + return pyopencl.array.zeros_like(self.eps) + else: + if len(initial_value) != 3: + Exception('Initial field value must be a list of length 3') + if not all((f.shape == self.shape for f in initial_value)): + Exception('Initial field list elements must have same shape as epsilon elements') + return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type)) def type_to_C(float_type) -> str: """ From f00c8b4a3eb921fc8ed118f8b7f240178a5f3c47 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 01:59:05 -0800 Subject: [PATCH 17/29] Add _create_context(), _create_operation(), and _create_pmls(), and generalize initial field value args --- opencl_fdtd/simulation.py | 116 ++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 66 deletions(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 525d818..7b94c64 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -76,8 +76,7 @@ class Simulation(object): epsilon: List[numpy.ndarray], pmls: List[Dict[str, int or float]], dt: float = .99/numpy.sqrt(3), - initial_E: List[numpy.ndarray] = None, - initial_H: List[numpy.ndarray] = None, + initial_fields: Dict[str, List[numpy.ndarray]] = None, context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None, float_type: numpy.float32 or numpy.float64 = numpy.float32, @@ -113,21 +112,14 @@ class Simulation(object): * GPU memory requirements are approximately doubled, since S and the intermediate products must be stored. """ + if initial_fields is None: + initial_fields = {} - if len(epsilon) != 3: - Exception('Epsilon must be a list with length of 3') - if not all((e.shape == epsilon[0].shape for e in epsilon[1:])): - Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon]) - - if context is None: - self.context = pyopencl.create_some_context() - else: - self.context = context - - if queue is None: - self.queue = pyopencl.CommandQueue(self.context) - else: - self.queue = queue + self.shape = epsilon[0].shape + self.arg_type = float_type + self.sources = {} + self._create_context(context, queue) + self._create_eps(epsilon) if dt > .99/numpy.sqrt(3): warnings.warn('Warning: unstable dt: {}'.format(dt)) @@ -136,27 +128,8 @@ class Simulation(object): else: self.dt = dt - self.arg_type = float_type - self.sources = {} - self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type)) - - if initial_E is None: - self.E = pyopencl.array.zeros_like(self.eps) - else: - if len(initial_E) != 3: - Exception('Initial_E must be a list of length 3') - if not all((E.shape == epsilon[0].shape for E in initial_E)): - Exception('Initial_E list elements must have same shape as epsilon elements') - self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type)) - - if initial_H is None: - self.H = pyopencl.array.zeros_like(self.eps) - else: - if len(initial_H) != 3: - Exception('Initial_H must be a list of length 3') - if not all((H.shape == epsilon[0].shape for H in initial_H)): - Exception('Initial_H list elements must have same shape as epsilon elements') - self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type)) + self.E = self._create_field(initial_fields.get('E', None)) + self.H = self._create_field(initial_fields.get('H', None)) for pml in pmls: pml.setdefault('thickness', 8) @@ -181,7 +154,7 @@ class Simulation(object): common_source = jinja_env.get_template('common.cl').render( ftype=ctype, - shape=epsilon[0].shape, + shape=self.shape, ) jinja_args = { 'common_header': common_source, @@ -194,21 +167,37 @@ class Simulation(object): self.sources['E'] = E_source self.sources['H'] = H_source + + + S_fields = OrderedDict() if do_poynting: S_source = jinja_env.get_template('update_s.cl').render(**jinja_args) self.sources['S'] = S_source - self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type) + self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type) self.S = pyopencl.array.zeros_like(self.E) - S_fields = OrderedDict() S_fields[ptr('oS')] = self.oS S_fields[ptr('S')] = self.S - else: - S_fields = OrderedDict() ''' PML ''' + pml_e_fields, pml_h_fields = self._create_pmls(pmls) + + ''' + Create operations + ''' + self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields)) + self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields)) + if do_poynting: + self.update_S = self._create_operation(S_source, (base_fields, S_fields)) + + + def _create_pmls(self, pmls): + ctype = type_to_C(self.arg_type) + def ptr(arg: str) -> str: + return ctype + ' *' + arg + pml_e_fields = OrderedDict() pml_h_fields = OrderedDict() for pml in pmls: @@ -225,7 +214,7 @@ class Simulation(object): p1 = sigma / (sigma + alpha) * (p0 - 1) return p0, p1 - xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=float_type)[::-1] for _ in range(2)) + xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2)) if pml['polarity'] == 'p': xe -= 0.5 elif pml['polarity'] == 'n': @@ -240,39 +229,34 @@ class Simulation(object): psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_' psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH'] - psi_shape = list(epsilon[0].shape) + psi_shape = list(self.shape) psi_shape[a] = pml['thickness'] for ne, nh in zip(*psi_names): pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type) + return pml_e_fields, pml_h_fields - self.pml_e_fields = pml_e_fields - self.pml_h_fields = pml_h_fields + def _create_operation(self, source, args_fields): + args = OrderedDict() + [args.update(d) for d in args_fields] + update = ElementwiseKernel(self.context, operation=source, + arguments=', '.join(args.keys())) + return lambda e: update(*args.values(), wait_for=e) - ''' - Create operations - ''' - E_args = OrderedDict() - [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)] - E_update = ElementwiseKernel(self.context, operation=E_source, - arguments=', '.join(E_args.keys())) + def _create_context(self, context: pyopencl.Context = None, + queue: pyopencl.CommandQueue = None): + if context is None: + self.context = pyopencl.create_some_context() + else: + self.context = context - H_args = OrderedDict() - [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)] - H_update = ElementwiseKernel(self.context, operation=H_source, - arguments=', '.join(H_args.keys())) - self.update_E = lambda e: E_update(*E_args.values(), wait_for=e) - self.update_H = lambda e: H_update(*H_args.values(), wait_for=e) + if queue is None: + self.queue = pyopencl.CommandQueue(self.context) + else: + self.queue = queue - if do_poynting: - S_args = OrderedDict() - [S_args.update(d) for d in (base_fields, S_fields)] - S_update = ElementwiseKernel(self.context, operation=S_source, - arguments=', '.join(S_args.keys())) - - self.update_S = lambda e: S_update(*S_args.values(), wait_for=e) def _create_eps(self, epsilon: List[numpy.ndarray]): if len(epsilon) != 3: raise Exception('Epsilon must be a list with length of 3') From cb471df1827ac726e3a36d72a9868177897b90ba Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 02:00:30 -0800 Subject: [PATCH 18/29] Implement proper kappa for PML --- opencl_fdtd/kernels/update_e.cl | 2 ++ opencl_fdtd/kernels/update_h.cl | 4 ++++ opencl_fdtd/simulation.py | 19 +++++++++++-------- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index eed4ee5..5a9ed1a 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -64,6 +64,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { {%- endif %} const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi + dH{{v ~ r}} *= p{{r}}2e{{p}}[ir]; + dH{{u ~ r}} *= p{{r}}2e{{p}}[ir]; {{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}}; {{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}}; pE{{u}}i {{se}}= {{psi ~ u}}[ip]; diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 968c2b6..56d9bbc 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -45,6 +45,8 @@ ftype aEzy = Ez[i + py] + Ez[i]; {%- endif %} + + /* * PML Update */ @@ -78,6 +80,8 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { {%- endif %} const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi + dE{{v ~ r}} *= p{{r}}2h{{p}}[ir]; + dE{{u ~ r}} *= p{{r}}2h{{p}}[ir]; {{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}}; {{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}}; pH{{u}}i {{sh}}= {{psi ~ u}}[ip]; diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 7b94c64..1b6d9a2 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -203,16 +203,19 @@ class Simulation(object): for pml in pmls: a = 'xyz'.find(pml['axis']) - sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) / \ - numpy.sqrt(pml['epsilon_eff'] * pml['mu_eff']) - alpha_max = 0 # TODO: Nonzero alpha + sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) + kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff']) + alpha_max = 0 # TODO: Nonzero alpha? def par(x): - sigma = ((x / pml['thickness']) ** pml['m']) * sigma_max + scaling = ((x / (pml['thickness'])) ** pml['m']) + sigma = scaling * sigma_max + kappa = 1 + scaling * (kappa_max - 1) alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max - p0 = numpy.exp(-(sigma + alpha) * dt) - p1 = sigma / (sigma + alpha) * (p0 - 1) - return p0, p1 + p0 = numpy.exp(-(sigma / kappa + alpha) * self.dt) + p1 = sigma / (sigma + kappa * alpha) * (p0 - 1) + p2 = 1/kappa + return p0, p1, p2 xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2)) if pml['polarity'] == 'p': @@ -220,7 +223,7 @@ class Simulation(object): elif pml['polarity'] == 'n': xh -= 0.5 - pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '01'] for eh in 'eh'] + pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh'] for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)): pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe) pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph) From d8dc024626f872d2a9310745c96d02413b1e1982 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 02:01:28 -0800 Subject: [PATCH 19/29] Implementation of "source" fields (J) --- opencl_fdtd/kernels/update_j.cl | 32 ++++++++++++++++++++++++++++++++ opencl_fdtd/simulation.py | 24 +++++++++++++++++++++++- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 opencl_fdtd/kernels/update_j.cl diff --git a/opencl_fdtd/kernels/update_j.cl b/opencl_fdtd/kernels/update_j.cl new file mode 100644 index 0000000..4d7f64d --- /dev/null +++ b/opencl_fdtd/kernels/update_j.cl @@ -0,0 +1,32 @@ +/* + * Update E-field from J field + * + * Template parameters: + * common_header: Rendered contents of common.cl + * + * OpenCL args: + * E, Jr, Ji, c, s, xmin, xmax, ymin, ymax, zmin, zmax + */ + +{{common_header}} + +//////////////////////////////////////////////////////////////////////////// + +__global ftype *Jrx = Jr + XX; +__global ftype *Jry = Jr + YY; +__global ftype *Jrz = Jr + ZZ; +__global ftype *Jix = Ji + XX; +__global ftype *Jiy = Ji + YY; +__global ftype *Jiz = Ji + ZZ; + + +if (x < xmin || y < ymin || z < zmin) { + PYOPENCL_ELWISE_CONTINUE; +} +if (x >= xmax || y >= ymax || z >= zmax) { + PYOPENCL_ELWISE_CONTINUE; +} + +Ex[i] += c * Jrx[i] + s * Jix[i]; +Ey[i] += c * Jry[i] + s * Jiy[i]; +Ez[i] += c * Jrz[i] + s * Jiz[i]; diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 1b6d9a2..6e12b2e 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -70,6 +70,7 @@ class Simulation(object): update_E = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] update_H = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] update_S = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] + update_J = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event] sources = None # type: Dict[str, str] def __init__(self, @@ -80,7 +81,8 @@ class Simulation(object): context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None, float_type: numpy.float32 or numpy.float64 = numpy.float32, - do_poynting: bool = True): + do_poynting: bool = True, + do_fieldsrc: bool = False): """ Initialize the simulation. @@ -179,6 +181,17 @@ class Simulation(object): S_fields[ptr('oS')] = self.oS S_fields[ptr('S')] = self.S + J_fields = OrderedDict() + if do_fieldsrc: + J_source = jinja_env.get_template('update_j.cl').render(**jinja_args) + self.sources['J'] = J_source + + self.Ji = pyopencl.array.zeros_like(self.E) + self.Jr = pyopencl.array.zeros_like(self.E) + J_fields[ptr('Jr')] = self.Jr + J_fields[ptr('Ji')] = self.Ji + + ''' PML ''' @@ -191,6 +204,15 @@ class Simulation(object): self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields)) if do_poynting: self.update_S = self._create_operation(S_source, (base_fields, S_fields)) + if do_fieldsrc: + args = OrderedDict() + [args.update(d) for d in (base_fields, J_fields)] + var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')] + print(var_args) + update = ElementwiseKernel(self.context, operation=J_source, + arguments=', '.join(list(args.keys()) + var_args)) + #print(len(args.values()),'\n\n', args.values(), args.keys()) + self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e) def _create_pmls(self, pmls): From 3ac20f62718f52d3ea710fb4fb24d135eb80faf7 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Thu, 29 Nov 2018 02:01:48 -0800 Subject: [PATCH 20/29] Add bloch boundaries (untested) --- opencl_fdtd/kernels/update_e.cl | 12 ++++++++++++ opencl_fdtd/kernels/update_h.cl | 15 +++++++++++++++ opencl_fdtd/simulation.py | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index 5a9ed1a..c578414 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -31,6 +31,18 @@ ftype dHyz = Hy[i] - Hy[i + mz]; ftype dHzx = Hz[i] - Hz[i + mx]; ftype dHzy = Hz[i] - Hz[i + my]; +{% for bloch in bloch_boundaries -%} + {%- set r = bloch['axis'] -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} +if ({{r}} == 0) { + ftype bloch_im = {{bloch['real']}}; + ftype bloch_re = {{bloch['imag']}}; + dH{{u ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{u}}[i] - G{{u}}[i + m{{u}}]); + dH{{v ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{v}}[i] - G{{v}}[i + m{{v}}]); +} +{%- endfor %} + + /* * PML Update */ diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 56d9bbc..0ef15dd 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -28,6 +28,21 @@ ftype dEyz = Ey[i + pz] - Ey[i]; ftype dEzx = Ez[i + px] - Ez[i]; ftype dEzy = Ez[i + py] - Ez[i]; + +{% for bloch in bloch_boundaries -%} + {%- set r = bloch['axis'] -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} +if ({{r}} == s{{r}} - 1) { + ftype bloch_re = {{bloch['real']}}; + ftype bloch_im = {{bloch['imag']}}; + dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]); + dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]); +} +{%- endfor %} + + + + {%- if do_poynting %} diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 6e12b2e..9b23cad 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -76,6 +76,7 @@ class Simulation(object): def __init__(self, epsilon: List[numpy.ndarray], pmls: List[Dict[str, int or float]], + bloch_boundaries: List[Dict[str, int or float]] = (), dt: float = .99/numpy.sqrt(3), initial_fields: Dict[str, List[numpy.ndarray]] = None, context: pyopencl.Context = None, @@ -132,6 +133,9 @@ class Simulation(object): self.E = self._create_field(initial_fields.get('E', None)) self.H = self._create_field(initial_fields.get('H', None)) + if bloch_boundaries: + self.F = self._create_field(initial_fields.get('F', None)) + self.G = self._create_field(initial_fields.get('G', None)) for pml in pmls: pml.setdefault('thickness', 8) @@ -154,6 +158,17 @@ class Simulation(object): eps_field = OrderedDict() eps_field[ptr('eps')] = self.eps + if bloch_boundaries: + base_fields[ptr('F')] = self.F + base_fields[ptr('G')] = self.G + + bloch_fields = OrderedDict() + bloch_fields[ptr('E')] = self.F + bloch_fields[ptr('H')] = self.G + bloch_fields[ctype + ' dt'] = self.dt + bloch_fields[ptr('F')] = self.E + bloch_fields[ptr('G')] = self.H + common_source = jinja_env.get_template('common.cl').render( ftype=ctype, shape=self.shape, @@ -162,13 +177,24 @@ class Simulation(object): 'common_header': common_source, 'pmls': pmls, 'do_poynting': do_poynting, + 'bloch': bloch_boundaries, } E_source = jinja_env.get_template('update_e.cl').render(**jinja_args) H_source = jinja_env.get_template('update_h.cl').render(**jinja_args) - self.sources['E'] = E_source self.sources['H'] = H_source + if bloch_boundaries: + bloch_args = jinja_args.copy() + bloch_args['do_poynting'] = False + bloch_args['bloch'] = [{'axis': b['axis'], + 'real': b['imag'], + 'imag': b['real']} + for b in bloch_boundaries] + F_source = jinja_env.get_template('update_e.cl').render(**bloch_args) + G_source = jinja_env.get_template('update_h.cl').render(**bloch_args) + self.sources['F'] = F_source + self.sources['G'] = G_source S_fields = OrderedDict() @@ -196,6 +222,8 @@ class Simulation(object): PML ''' pml_e_fields, pml_h_fields = self._create_pmls(pmls) + if bloch_boundaries: + pml_f_fields, pml_g_fields = self._create_pmls(pmls) ''' Create operations @@ -204,6 +232,9 @@ class Simulation(object): self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields)) if do_poynting: self.update_S = self._create_operation(S_source, (base_fields, S_fields)) + if bloch_boundaries: + self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields)) + self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields)) if do_fieldsrc: args = OrderedDict() [args.update(d) for d in (base_fields, J_fields)] From 5ffe547640c78af55a7a8e802f8a503cee21e950 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:13:27 -0800 Subject: [PATCH 21/29] Typo fixes and minor comment updates --- opencl_fdtd/kernels/update_e.cl | 4 ++-- opencl_fdtd/kernels/update_h.cl | 2 +- opencl_fdtd/simulation.py | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index c578414..f44527d 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -4,7 +4,7 @@ * Template parameters: * common_header: Rendered contents of common.cl * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing - axes, polarities, and thicknesses. + * axes, polarities, and thicknesses. * * OpenCL args: * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] @@ -20,7 +20,7 @@ __global ftype *epsz = eps + ZZ; /* - * Precalclate derivatives + * Precalculate derivatives */ ftype dHxy = Hx[i] - Hx[i + my]; ftype dHxz = Hx[i] - Hx[i + mz]; diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 0ef15dd..f1c19cb 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -5,7 +5,7 @@ * Template parameters: * common_header: Rendered contents of common.cl * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing - axes, polarities, and thicknesses. + * axes, polarities, and thicknesses. * do_poynting: Whether to precalculate poynting vector components (boolean) * * OpenCL args: diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 9b23cad..0eaef5d 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -56,11 +56,11 @@ class Simulation(object): event0 and event1 to occur (i.e. previous operations to finish) before starting execution. event2 can then be used to prepare further operations to be run after update_H. """ - E = None # type: List[pyopencl.array.Array] - H = None # type: List[pyopencl.array.Array] - S = None # type: List[pyopencl.array.Array] - eps = None # type: List[pyopencl.array.Array] - dt = None # type: float + E = None # type: pyopencl.array.Array + H = None # type: pyopencl.array.Array + S = None # type: pyopencl.array.Array + eps = None # type: pyopencl.array.Array + dt = None # type: float arg_type = None # type: numpy.float32 or numpy.float64 From f5a5a0ad04176e8ebbca15c8e4a53300e5b2f5e6 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:36:11 -0800 Subject: [PATCH 22/29] Enable nonuniform grids (minimally tested) --- opencl_fdtd/kernels/update_e.cl | 28 +++++++++++++++++++++------- opencl_fdtd/kernels/update_h.cl | 28 +++++++++++++++++++++------- opencl_fdtd/simulation.py | 28 +++++++++++++++++++++++++--- 3 files changed, 67 insertions(+), 17 deletions(-) diff --git a/opencl_fdtd/kernels/update_e.cl b/opencl_fdtd/kernels/update_e.cl index f44527d..9127181 100644 --- a/opencl_fdtd/kernels/update_e.cl +++ b/opencl_fdtd/kernels/update_e.cl @@ -5,9 +5,12 @@ * common_header: Rendered contents of common.cl * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing * axes, polarities, and thicknesses. + * uniform_dx: If grid is uniform, uniform_dx should be the grid spacing. + * Otherwise, uniform_dx should be False and [inv_dh{xyz}] arrays must be supplied as + * OpenCL args. * * OpenCL args: - * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] + * E, H, dt, eps, [p{012}e{np}, Psi_{xyz}{np}_E], [inv_dh{xyz}] */ {{common_header}} @@ -19,17 +22,28 @@ __global ftype *epsy = eps + YY; __global ftype *epsz = eps + ZZ; +{%- if uniform_dx %} +ftype inv_dx = 1.0 / {{uniform_dx}}; +ftype inv_dy = 1.0 / {{uniform_dx}}; +ftype inv_dz = 1.0 / {{uniform_dx}}; +{%- else %} +ftype inv_dx = inv_dhx[x]; +ftype inv_dy = inv_dhy[y]; +ftype inv_dz = inv_dhz[z]; +{%- endif %} + + /* * Precalculate derivatives */ -ftype dHxy = Hx[i] - Hx[i + my]; -ftype dHxz = Hx[i] - Hx[i + mz]; +ftype dHxy = (Hx[i] - Hx[i + my]) * inv_dy; +ftype dHxz = (Hx[i] - Hx[i + mz]) * inv_dz; -ftype dHyx = Hy[i] - Hy[i + mx]; -ftype dHyz = Hy[i] - Hy[i + mz]; +ftype dHyx = (Hy[i] - Hy[i + mx]) * inv_dx; +ftype dHyz = (Hy[i] - Hy[i + mz]) * inv_dz; -ftype dHzx = Hz[i] - Hz[i + mx]; -ftype dHzy = Hz[i] - Hz[i + my]; +ftype dHzx = (Hz[i] - Hz[i + mx]) * inv_dx; +ftype dHzy = (Hz[i] - Hz[i + my]) * inv_dy; {% for bloch in bloch_boundaries -%} {%- set r = bloch['axis'] -%} diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index f1c19cb..ac75157 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -7,9 +7,12 @@ * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing * axes, polarities, and thicknesses. * do_poynting: Whether to precalculate poynting vector components (boolean) + * uniform_dx: If grid is uniform, uniform_dx should be the grid spacing. + * Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as + * OpenCL args. * * OpenCL args: - * E, H, dt, [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS] + * E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS] */ {{common_header}} @@ -19,14 +22,25 @@ /* * Precalculate derivatives */ -ftype dExy = Ex[i + py] - Ex[i]; -ftype dExz = Ex[i + pz] - Ex[i]; +{%- if uniform_dx %} +ftype inv_dx = 1.0 / {{uniform_dx}}; +ftype inv_dy = 1.0 / {{uniform_dx}}; +ftype inv_dz = 1.0 / {{uniform_dx}}; +{%- else %} +ftype inv_dx = inv_dex[x]; +ftype inv_dy = inv_dey[y]; +ftype inv_dz = inv_dez[z]; +{%- endif %} -ftype dEyx = Ey[i + px] - Ey[i]; -ftype dEyz = Ey[i + pz] - Ey[i]; -ftype dEzx = Ez[i + px] - Ez[i]; -ftype dEzy = Ez[i + py] - Ez[i]; +ftype dExy = (Ex[i + py] - Ex[i]) * inv_dy; +ftype dExz = (Ex[i + pz] - Ex[i]) * inv_dz; + +ftype dEyx = (Ey[i + px] - Ey[i]) * inv_dx; +ftype dEyz = (Ey[i + pz] - Ey[i]) * inv_dz; + +ftype dEzx = (Ez[i + px] - Ez[i]) * inv_dx; +ftype dEzy = (Ez[i + py] - Ez[i]) * inv_dy; {% for bloch in bloch_boundaries -%} diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 0eaef5d..940725a 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -61,6 +61,7 @@ class Simulation(object): S = None # type: pyopencl.array.Array eps = None # type: pyopencl.array.Array dt = None # type: float + inv_dxes = None # type: List[pyopencl.array.Array] arg_type = None # type: numpy.float32 or numpy.float64 @@ -77,7 +78,8 @@ class Simulation(object): epsilon: List[numpy.ndarray], pmls: List[Dict[str, int or float]], bloch_boundaries: List[Dict[str, int or float]] = (), - dt: float = .99/numpy.sqrt(3), + dxes: List[List[numpy.ndarray]] or float = None, + dt: float = None, initial_fields: Dict[str, List[numpy.ndarray]] = None, context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None, @@ -98,7 +100,7 @@ class Simulation(object): 'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6. 'm': Polynomial grading exponent. Default 3.5. 'ma': Exponent for alpha. Default 1. - :param dt: Time step. Default is .99/sqrt(3). + :param dt: Time step. Default is min(dxes) * .99/sqrt(3). :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon. :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon. :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. @@ -124,7 +126,22 @@ class Simulation(object): self._create_context(context, queue) self._create_eps(epsilon) - if dt > .99/numpy.sqrt(3): + if dxes is None: + dxes = 1.0 + + if isinstance(dxes, (float, int)): + uniform_dx = dxes + min_dx = dxes + else: + uniform_dx = False + self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]] + min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1]) + + max_dt = min_dx * .99 / numpy.sqrt(3) + + if dt is None: + self.dt = max_dt + elif dt > max_dt: warnings.warn('Warning: unstable dt: {}'.format(dt)) elif dt <= 0: raise Exception('Invalid dt: {}'.format(dt)) @@ -154,6 +171,10 @@ class Simulation(object): base_fields[ptr('E')] = self.E base_fields[ptr('H')] = self.H base_fields[ctype + ' dt'] = self.dt + if uniform_dx == False: + inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz'] + for name, field in zip(inv_dx_names, self.inv_dxes): + base_fields[ptr(name)] = field eps_field = OrderedDict() eps_field[ptr('eps')] = self.eps @@ -178,6 +199,7 @@ class Simulation(object): 'pmls': pmls, 'do_poynting': do_poynting, 'bloch': bloch_boundaries, + 'uniform_dx': uniform_dx, } E_source = jinja_env.get_template('update_e.cl').render(**jinja_args) H_source = jinja_env.get_template('update_h.cl').render(**jinja_args) From 65cc1183921b6107b8487fc762a1606bb6512d8a Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:36:22 -0800 Subject: [PATCH 23/29] Remove debug code --- opencl_fdtd/simulation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 940725a..087e9c4 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -261,10 +261,8 @@ class Simulation(object): args = OrderedDict() [args.update(d) for d in (base_fields, J_fields)] var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')] - print(var_args) update = ElementwiseKernel(self.context, operation=J_source, arguments=', '.join(list(args.keys()) + var_args)) - #print(len(args.values()),'\n\n', args.values(), args.keys()) self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e) From 9cf50b1d88f2745cbf8f8215e23ec0610de67b84 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:36:32 -0800 Subject: [PATCH 24/29] Enable nonzero pml alpha --- opencl_fdtd/simulation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 087e9c4..d24dcf2 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -160,6 +160,7 @@ class Simulation(object): pml.setdefault('mu_eff', 1.0) pml.setdefault('ln_R_per_layer', -1.6) pml.setdefault('m', 3.5) + pml.setdefault('cfs_alpha', 0) pml.setdefault('ma', 1) ctype = type_to_C(self.arg_type) @@ -278,7 +279,7 @@ class Simulation(object): sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1) kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff']) - alpha_max = 0 # TODO: Nonzero alpha? + alpha_max = pml['cfs_alpha'] def par(x): scaling = ((x / (pml['thickness'])) ** pml['m']) From 60b70bb332d29fb5a2aecb801517be659cf4daf9 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:37:07 -0800 Subject: [PATCH 25/29] Add restrict keyword to pointers (not sharing the same memory for multiple fields) --- opencl_fdtd/simulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index d24dcf2..1e573c9 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -166,7 +166,7 @@ class Simulation(object): ctype = type_to_C(self.arg_type) def ptr(arg: str) -> str: - return ctype + ' *' + arg + return ctype + ' * restrict ' + arg base_fields = OrderedDict() base_fields[ptr('E')] = self.E From a76e741d325d26ff5968904ae12f4ed95effd313 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Fri, 30 Nov 2018 00:37:16 -0800 Subject: [PATCH 26/29] Minor spacing changes --- opencl_fdtd/simulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 1e573c9..988e45b 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -321,7 +321,6 @@ class Simulation(object): arguments=', '.join(args.keys())) return lambda e: update(*args.values(), wait_for=e) - def _create_context(self, context: pyopencl.Context = None, queue: pyopencl.CommandQueue = None): if context is None: @@ -353,6 +352,7 @@ class Simulation(object): Exception('Initial field list elements must have same shape as epsilon elements') return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type)) + def type_to_C(float_type) -> str: """ Returns a string corresponding to the C equivalent of a numpy type. From 0f0f16e1846489ac1ab4c4126167c5b0b3fbb0fe Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 30 Nov 2018 01:03:10 -0800 Subject: [PATCH 27/29] Minor formatting changes --- opencl_fdtd/kernels/update_h.cl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index ac75157..187a18a 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -52,10 +52,7 @@ if ({{r}} == s{{r}} - 1) { dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]); dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]); } -{%- endfor %} - - - +{% endfor -%} {%- if do_poynting %} @@ -93,7 +90,7 @@ ftype pHzi = 0; {%- set se, sh = '-', '+' -%} {%- else -%} {%- set se, sh = '+', '-' -%} - {%- endif -%} + {%- endif %} int pml_{{r ~ p}}_thickness = {{pml['thickness']}}; From 385e2859a9620cefe3536bd2eea9f5a051c2e9b2 Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 30 Nov 2018 01:03:20 -0800 Subject: [PATCH 28/29] Update parameter descriptions --- opencl_fdtd/simulation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 988e45b..9dd90c0 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -100,9 +100,13 @@ class Simulation(object): 'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6. 'm': Polynomial grading exponent. Default 3.5. 'ma': Exponent for alpha. Default 1. + :param bloch_boundaries: List of dicts with keys: + 'axis': One of 'x', 'y', 'z'. + 'real': Real part of bloch phase factor (i.e. real(exp(i * phase))) + 'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase))) :param dt: Time step. Default is min(dxes) * .99/sqrt(3). - :param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon. - :param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon. + :param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the + specified fields (default is 0 everywhere). Fields have same format as epsilon. :param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called. :param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called. :param float_type: numpy.float32 or numpy.float64. Default numpy.float32. From 582dafbc2fefce0bcc38b524a715e99f9c5ac89b Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 30 Nov 2018 01:04:00 -0800 Subject: [PATCH 29/29] Update example with bloch fields --- fdtd.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/fdtd.py b/fdtd.py index 08606c6..99af07b 100644 --- a/fdtd.py +++ b/fdtd.py @@ -128,7 +128,9 @@ def main(): # #### Create the simulation grid #### pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness} for a in 'xyz' for p in 'np'] - sim = Simulation(grid.grids, do_poynting=True, pmls=pmls) + #bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x'] + bloch = [] + sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch) # Source parameters and function w = 2 * numpy.pi * dx / wl @@ -142,22 +144,32 @@ def main(): with open('sources.c', 'w') as f: f.write(sim.sources['E']) - f.write('\n==========================================\n') + f.write('\n====================H======================\n') f.write(sim.sources['H']) if sim.update_S: - f.write('\n==========================================\n') + f.write('\n=====================S=====================\n') f.write(sim.sources['S']) + if bloch: + f.write('\n=====================F=====================\n') + f.write(sim.sources['F']) + f.write('\n=====================G=====================\n') + f.write(sim.sources['G']) # #### Run a bunch of iterations #### # event = sim.whatever([prev_event]) indicates that sim.whatever should be queued # immediately and run once prev_event is finished. start = time.perf_counter() for t in range(max_t): - sim.update_E([]).wait() + e = sim.update_E([]) + if bloch: + e = sim.update_F([e]) + e.wait() ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape) sim.E[ind] += field_source(t) e = sim.update_H([]) + if bloch: + e = sim.update_G([e]) if sim.update_S: e = sim.update_S([e]) e.wait()