Compare commits

...

33 Commits

Author SHA1 Message Date
d5fd78d493 Revert "Add restrict keyword to pointers (not sharing the same memory for multiple fields)"
This reverts commit 60b70bb332.

It appears to have minimal performance impact, and fails to compile on
nvidia cards.
2019-07-17 00:55:28 -07:00
fe2f4f4510 style fixes 2019-07-15 00:06:43 -07:00
314e36a3cc fix for 1-sized grids 2019-07-15 00:06:43 -07:00
a37df3a74f expand gitignores 2019-07-15 00:06:43 -07:00
jan
582dafbc2f Update example with bloch fields 2018-11-30 01:04:00 -08:00
jan
385e2859a9 Update parameter descriptions 2018-11-30 01:03:20 -08:00
jan
0f0f16e184 Minor formatting changes 2018-11-30 01:03:10 -08:00
a76e741d32 Minor spacing changes 2018-11-30 00:37:16 -08:00
60b70bb332 Add restrict keyword to pointers (not sharing the same memory for multiple fields) 2018-11-30 00:37:07 -08:00
9cf50b1d88 Enable nonzero pml alpha 2018-11-30 00:36:32 -08:00
65cc118392 Remove debug code 2018-11-30 00:36:22 -08:00
f5a5a0ad04 Enable nonuniform grids (minimally tested) 2018-11-30 00:36:11 -08:00
5ffe547640 Typo fixes and minor comment updates 2018-11-30 00:13:27 -08:00
3ac20f6271 Add bloch boundaries (untested) 2018-11-29 02:01:48 -08:00
d8dc024626 Implementation of "source" fields (J) 2018-11-29 02:01:28 -08:00
cb471df182 Implement proper kappa for PML 2018-11-29 02:00:30 -08:00
f00c8b4a3e Add _create_context(), _create_operation(), and _create_pmls(), and generalize initial field value args 2018-11-29 01:59:05 -08:00
2b1d906665 Add _create_field() and _create_eps() 2018-11-29 01:38:28 -08:00
1e874cb0c0 Fix sign on H component of PML 2018-11-29 01:36:13 -08:00
ea5e298023 Explicitly cast to int 2018-11-29 01:35:20 -08:00
1de6fb0e39 Use readme as long_description 2018-09-16 20:12:52 -07:00
d0b303523e Move version string into module 2018-09-16 20:12:41 -07:00
c8298d916f Use python3 for setup.py 2018-09-16 20:12:26 -07:00
aab57412a5 move code to new location 2018-01-15 22:36:40 -08:00
jan
a67009d1c8 fix declaration 2017-10-06 14:20:10 -07:00
jan
6a03977a69 fix pml param names 2017-10-06 14:12:51 -07:00
jan
9772f47a34 fix typo 2017-10-06 14:01:22 -07:00
jan
6bd756c3d3 add setup.py 2017-10-06 13:50:57 -07:00
jan
d02cd18403 improve pml specification 2017-10-06 13:49:46 -07:00
jan
c137da15b9 Merge branch 'master' of mpxd.net:jan/opencl_fdtd 2017-08-24 11:28:14 -07:00
jan
a85f547749 doc updates 2017-08-24 11:28:03 -07:00
jan
0d91f0d43e rename lib 2017-08-14 15:41:20 -07:00
97d9901e4b Use logging package for output 2017-08-12 19:20:29 -07:00
15 changed files with 659 additions and 369 deletions

8
.gitignore vendored
View File

@ -1,5 +1,9 @@
.idea/
__pycache__
*.h5
*.pyc
__pycache__
*.py[cod]
build/
dist/
*.egg-info/

View File

@ -27,14 +27,16 @@ electromagnetic simulations on parallel compute hardware (mainly GPUs).
* numpy
* pyopencl
* jinja2
* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools)
Optional (used for examples):
* dill (for file output)
* [gridlock](https://mpxd.net/gogs/jan/gridlock)
* [masque](https://mpxd.net/gogs/jan/masque)
* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)
* [gridlock](https://mpxd.net/code/jan/gridlock)
* [masque](https://mpxd.net/code/jan/masque)
To get the code, just clone this repository:
```bash
git clone https://mpxd.net/gogs/jan/opencl_fdtd.git
git clone https://mpxd.net/code/jan/opencl_fdtd.git
```
You can install the requirements and their dependencies easily with

33
fdtd.py
View File

@ -6,12 +6,13 @@ See main() for simulation setup.
import sys
import time
import logging
import numpy
import lzma
import dill
from fdtd.simulation import Simulation
from opencl_fdtd import Simulation
from masque import Pattern, shapes
import gridlock
import pcgen
@ -20,6 +21,9 @@ import fdfd_tools
__author__ = 'Jan Petykiewicz'
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
"""
@ -120,9 +124,13 @@ def main():
eps=n_air**2,
polygons=mask.as_polygons())
print('grid shape: {}'.format(grid.shape))
logger.info('grid shape: {}'.format(grid.shape))
# #### Create the simulation grid ####
sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
for a in 'xyz' for p in 'np']
#bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x']
bloch = []
sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
# Source parameters and function
w = 2 * numpy.pi * dx / wl
@ -136,28 +144,38 @@ def main():
with open('sources.c', 'w') as f:
f.write(sim.sources['E'])
f.write('\n==========================================\n')
f.write('\n====================H======================\n')
f.write(sim.sources['H'])
if sim.update_S:
f.write('\n==========================================\n')
f.write('\n=====================S=====================\n')
f.write(sim.sources['S'])
if bloch:
f.write('\n=====================F=====================\n')
f.write(sim.sources['F'])
f.write('\n=====================G=====================\n')
f.write(sim.sources['G'])
# #### Run a bunch of iterations ####
# event = sim.whatever([prev_event]) indicates that sim.whatever should be queued
# immediately and run once prev_event is finished.
start = time.perf_counter()
for t in range(max_t):
sim.update_E([]).wait()
e = sim.update_E([])
if bloch:
e = sim.update_F([e])
e.wait()
ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
sim.E[ind] += field_source(t)
e = sim.update_H([])
if bloch:
e = sim.update_G([e])
if sim.update_S:
e = sim.update_S([e])
e.wait()
if t % 100 == 0:
print('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
sys.stdout.flush()
with lzma.open('saved_simulation', 'wb') as f:
@ -172,5 +190,6 @@ def main():
d['S'] = unvec(sim.S.get())
dill.dump(d, f)
if __name__ == '__main__':
main()

View File

View File

@ -1,78 +0,0 @@
/*
* Update E-field, including any PMLs.
*
* Template parameters:
* common_header: Rendered contents of common.cl
* pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
* pml_thickness: Number of cells (integer)
*
* OpenCL args:
* E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
*/
{{common_header}}
////////////////////////////////////////////////////////////////////////////
__global ftype *epsx = eps + XX;
__global ftype *epsy = eps + YY;
__global ftype *epsz = eps + ZZ;
{% if pmls -%}
const int pml_thickness = {{pml_thickness}};
{%- endif %}
/*
* Precalclate derivatives
*/
ftype dHxy = Hx[i] - Hx[i + my];
ftype dHxz = Hx[i] - Hx[i + mz];
ftype dHyx = Hy[i] - Hy[i + mx];
ftype dHyz = Hy[i] - Hy[i + mz];
ftype dHzx = Hz[i] - Hz[i + mx];
ftype dHzy = Hz[i] - Hz[i + my];
/*
* PML Update
*/
// PML effects on E
ftype pExi = 0;
ftype pEyi = 0;
ftype pEzi = 0;
{% for r, p in pmls -%}
{%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
{%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
{%- if r != 'y' -%}
{%- set se, sh = '-', '+' -%}
{%- else -%}
{%- set se, sh = '+', '-' -%}
{%- endif -%}
{%- if p == 'n' %}
if ( {{r}} < pml_thickness ) {
const size_t ir = {{r}}; // index into pml parameters
{%- elif p == 'p' %}
if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters
{%- endif %}
const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi
{{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
{{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
pE{{u}}i {{se}}= {{psi ~ u}}[ip];
pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
}
{%- endfor %}
/*
* Update E
*/
Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi);
Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi);
Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi);

View File

@ -1,238 +0,0 @@
"""
Class for constructing and holding the basic FDTD operations and fields
"""
from typing import List, Dict, Callable
from collections import OrderedDict
import numpy
import jinja2
import warnings
import pyopencl
import pyopencl.array
from pyopencl.elementwise import ElementwiseKernel
from fdfd_tools import vec
__author__ = 'Jan Petykiewicz'
# Create jinja2 env on module load
jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
class Simulation(object):
"""
Constructs and holds the basic FDTD operations and related fields
"""
E = None # type: List[pyopencl.array.Array]
H = None # type: List[pyopencl.array.Array]
S = None # type: List[pyopencl.array.Array]
eps = None # type: List[pyopencl.array.Array]
dt = None # type: float
arg_type = None # type: numpy.float32 or numpy.float64
context = None # type: pyopencl.Context
queue = None # type: pyopencl.CommandQueue
update_E = None # type: Callable[[],pyopencl.Event]
update_H = None # type: Callable[[],pyopencl.Event]
update_S = None # type: Callable[[],pyopencl.Event]
sources = None # type: Dict[str, str]
def __init__(self,
epsilon: List[numpy.ndarray],
dt: float = .99/numpy.sqrt(3),
initial_E: List[numpy.ndarray] = None,
initial_H: List[numpy.ndarray] = None,
context: pyopencl.Context = None,
queue: pyopencl.CommandQueue = None,
float_type: numpy.float32 or numpy.float64 = numpy.float32,
pml_thickness: int = 10,
pmls: List[List[str]] = None,
do_poynting: bool = True):
"""
Initialize the simulation.
:param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
spanning the simulation domain. Relative epsilon is used.
:param dt: Time step. Default is the Courant factor.
:param initial_E: Initial E-field (default is 0 everywhere). Same format as epsilon.
:param initial_H: Initial H-field (default is 0 everywhere). Same format as epsilon.
:param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
:param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
:param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
"""
if len(epsilon) != 3:
Exception('Epsilon must be a list with length of 3')
if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
if context is None:
self.context = pyopencl.create_some_context()
else:
self.context = context
if queue is None:
self.queue = pyopencl.CommandQueue(self.context)
else:
self.queue = queue
if dt > .99/numpy.sqrt(3):
warnings.warn('Warning: unstable dt: {}'.format(dt))
elif dt <= 0:
raise Exception('Invalid dt: {}'.format(dt))
else:
self.dt = dt
self.arg_type = float_type
self.sources = {}
self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type))
if initial_E is None:
self.E = pyopencl.array.zeros_like(self.eps)
else:
if len(initial_E) != 3:
Exception('Initial_E must be a list of length 3')
if not all((E.shape == epsilon[0].shape for E in initial_E)):
Exception('Initial_E list elements must have same shape as epsilon elements')
self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type))
if initial_H is None:
self.H = pyopencl.array.zeros_like(self.eps)
else:
if len(initial_H) != 3:
Exception('Initial_H must be a list of length 3')
if not all((H.shape == epsilon[0].shape for H in initial_H)):
Exception('Initial_H list elements must have same shape as epsilon elements')
self.H = pyopencl.array.to_device(self.queue, vec(H).astype(float_type))
if pmls is None:
pmls = [[d, p] for d in 'xyz' for p in 'np']
ctype = type_to_C(self.arg_type)
def ptr(arg: str) -> str:
return ctype + ' *' + arg
base_fields = OrderedDict()
base_fields[ptr('E')] = self.E
base_fields[ptr('H')] = self.H
base_fields[ctype + ' dt'] = self.dt
eps_field = OrderedDict()
eps_field[ptr('eps')] = self.eps
common_source = jinja_env.get_template('common.cl').render(
ftype=ctype,
shape=epsilon[0].shape,
)
jinja_args = {
'common_header': common_source,
'pml_thickness': pml_thickness,
'pmls': pmls,
'do_poynting': do_poynting,
}
E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
self.sources['E'] = E_source
self.sources['H'] = H_source
if do_poynting:
S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
self.sources['S'] = S_source
self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=float_type)
self.S = pyopencl.array.zeros_like(self.E)
S_fields = OrderedDict()
S_fields[ptr('oS')] = self.oS
S_fields[ptr('S')] = self.S
else:
S_fields = OrderedDict()
'''
PML
'''
m = (3.5, 1)
sigma_max = 0.8 * (m[0] + 1) / numpy.sqrt(1.0) # TODO: epsilon_eff (not 1.0)
alpha_max = 0 # TODO: Decide what to do about non-zero alpha
def par(x):
sigma = ((x / pml_thickness) ** m[0]) * sigma_max
alpha = ((1 - x / pml_thickness) ** m[1]) * alpha_max
p0 = numpy.exp(-(sigma + alpha) * dt)
p1 = sigma / (sigma + alpha) * (p0 - 1)
return p0, p1
xen, xep, xhn, xhp = (numpy.arange(1, pml_thickness + 1, dtype=float_type)[::-1] for _ in range(4))
xep -= 0.5
xhn -= 0.5
pml_p_names = [['p' + a + eh + np for np in 'np' for a in '01'] for eh in 'eh']
pml_e_fields = OrderedDict()
pml_h_fields = OrderedDict()
for ne, nh, pe, ph in zip(*pml_p_names, par(xen) + par(xep), par(xhn) + par(xhp)):
pml_e_fields[ptr(ne)] = pyopencl.array.to_device(self.queue, pe)
pml_h_fields[ptr(nh)] = pyopencl.array.to_device(self.queue, ph)
for pml in pmls:
uv = 'xyz'.replace(pml[0], '')
psi_base = 'Psi_' + ''.join(pml) + '_'
psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
psi_shape = list(epsilon[0].shape)
psi_shape['xyz'.find(pml[0])] = pml_thickness
for ne, nh in zip(*psi_names):
pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
self.pml_e_fields = pml_e_fields
self.pml_h_fields = pml_h_fields
'''
Create operations
'''
E_args = OrderedDict()
[E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)]
E_update = ElementwiseKernel(self.context, operation=E_source,
arguments=', '.join(E_args.keys()))
H_args = OrderedDict()
[H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)]
H_update = ElementwiseKernel(self.context, operation=H_source,
arguments=', '.join(H_args.keys()))
self.update_E = lambda e: E_update(*E_args.values(), wait_for=e)
self.update_H = lambda e: H_update(*H_args.values(), wait_for=e)
if do_poynting:
S_args = OrderedDict()
[S_args.update(d) for d in (base_fields, S_fields)]
S_update = ElementwiseKernel(self.context, operation=S_source,
arguments=', '.join(S_args.keys()))
self.update_S = lambda e: S_update(*S_args.values(), wait_for=e)
def type_to_C(float_type) -> str:
"""
Returns a string corresponding to the C equivalent of a numpy type.
Only works for float16, float32, float64.
:param float_type: e.g. numpy.float32
:return: string containing the corresponding C type (eg. 'double')
"""
if float_type == numpy.float16:
arg_type = 'half'
elif float_type == numpy.float32:
arg_type = 'float'
elif float_type == numpy.float64:
arg_type = 'double'
else:
raise Exception('Unsupported type')
return arg_type

5
opencl_fdtd/__init__.py Normal file
View File

@ -0,0 +1,5 @@
from .simulation import Simulation, type_to_C
__author__ = 'Jan Petykiewicz'
version = '0.4'

View File

@ -73,12 +73,13 @@ __global ftype *Hz = H + ZZ;
* the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
*/
{% for r in 'xyz' %}
int m{{r}} = -di{{r}};
int p{{r}} = +di{{r}};
int wrap_{{r}} = (s{{r}} - 1) * di{{r}};
int m{{r}} = - (int) di{{r}};
int p{{r}} = + (int) di{{r}};
int wrap_{{r}} = (s{{r}} - 1) * (int) di{{r}};
if ( {{r}} == 0 ) {
m{{r}} = wrap_{{r}};
} else if ( {{r}} == s{{r}} - 1 ) {
}
if ( {{r}} == s{{r}} - 1 ) {
p{{r}} = -wrap_{{r}};
}
{% endfor %}

View File

@ -0,0 +1,107 @@
/*
* Update E-field, including any PMLs.
*
* Template parameters:
* common_header: Rendered contents of common.cl
* pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
* axes, polarities, and thicknesses.
* uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
* Otherwise, uniform_dx should be False and [inv_dh{xyz}] arrays must be supplied as
* OpenCL args.
*
* OpenCL args:
* E, H, dt, eps, [p{012}e{np}, Psi_{xyz}{np}_E], [inv_dh{xyz}]
*/
{{common_header}}
////////////////////////////////////////////////////////////////////////////
__global ftype *epsx = eps + XX;
__global ftype *epsy = eps + YY;
__global ftype *epsz = eps + ZZ;
{%- if uniform_dx %}
ftype inv_dx = 1.0 / {{uniform_dx}};
ftype inv_dy = 1.0 / {{uniform_dx}};
ftype inv_dz = 1.0 / {{uniform_dx}};
{%- else %}
ftype inv_dx = inv_dhx[x];
ftype inv_dy = inv_dhy[y];
ftype inv_dz = inv_dhz[z];
{%- endif %}
/*
* Precalculate derivatives
*/
ftype dHxy = (Hx[i] - Hx[i + my]) * inv_dy;
ftype dHxz = (Hx[i] - Hx[i + mz]) * inv_dz;
ftype dHyx = (Hy[i] - Hy[i + mx]) * inv_dx;
ftype dHyz = (Hy[i] - Hy[i + mz]) * inv_dz;
ftype dHzx = (Hz[i] - Hz[i + mx]) * inv_dx;
ftype dHzy = (Hz[i] - Hz[i + my]) * inv_dy;
{% for bloch in bloch_boundaries -%}
{%- set r = bloch['axis'] -%}
{%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
if ({{r}} == 0) {
ftype bloch_im = {{bloch['real']}};
ftype bloch_re = {{bloch['imag']}};
dH{{u ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{u}}[i] - G{{u}}[i + m{{u}}]);
dH{{v ~ r}} = bloch_re * dH{{v ~ r}} + bloch_im * (G{{v}}[i] - G{{v}}[i + m{{v}}]);
}
{%- endfor %}
/*
* PML Update
*/
// PML effects on E
ftype pExi = 0;
ftype pEyi = 0;
ftype pEzi = 0;
{% for pml in pmls -%}
{%- set r = pml['axis'] -%}
{%- set p = pml['polarity'] -%}
{%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
{%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
{%- if r != 'y' -%}
{%- set se, sh = '-', '+' -%}
{%- else -%}
{%- set se, sh = '+', '-' -%}
{%- endif -%}
int pml_{{r ~ p}}_thickness = {{pml['thickness']}};
{%- if p == 'n' %}
if ( {{r}} < pml_{{r ~ p}}_thickness ) {
const size_t ir = {{r}}; // index into pml parameters
{%- elif p == 'p' %}
if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters
{%- endif %}
const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi
dH{{v ~ r}} *= p{{r}}2e{{p}}[ir];
dH{{u ~ r}} *= p{{r}}2e{{p}}[ir];
{{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}};
{{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}};
pE{{u}}i {{se}}= {{psi ~ u}}[ip];
pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
}
{%- endfor %}
/*
* Update E
*/
Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi);
Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi);
Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi);

View File

@ -4,33 +4,55 @@
*
* Template parameters:
* common_header: Rendered contents of common.cl
* pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
* pml_thickness: Number of cells (integer)
* pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
* axes, polarities, and thicknesses.
* do_poynting: Whether to precalculate poynting vector components (boolean)
* uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
* Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as
* OpenCL args.
*
* OpenCL args:
* E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
* E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
*/
{{common_header}}
////////////////////////////////////////////////////////////////////////////
{% if pmls -%}
const int pml_thickness = {{pml_thickness}};
{%- endif %}
/*
* Precalculate derivatives
*/
ftype dExy = Ex[i + py] - Ex[i];
ftype dExz = Ex[i + pz] - Ex[i];
{%- if uniform_dx %}
ftype inv_dx = 1.0 / {{uniform_dx}};
ftype inv_dy = 1.0 / {{uniform_dx}};
ftype inv_dz = 1.0 / {{uniform_dx}};
{%- else %}
ftype inv_dx = inv_dex[x];
ftype inv_dy = inv_dey[y];
ftype inv_dz = inv_dez[z];
{%- endif %}
ftype dEyx = Ey[i + px] - Ey[i];
ftype dEyz = Ey[i + pz] - Ey[i];
ftype dEzx = Ez[i + px] - Ez[i];
ftype dEzy = Ez[i + py] - Ez[i];
ftype dExy = (Ex[i + py] - Ex[i]) * inv_dy;
ftype dExz = (Ex[i + pz] - Ex[i]) * inv_dz;
ftype dEyx = (Ey[i + px] - Ey[i]) * inv_dx;
ftype dEyz = (Ey[i + pz] - Ey[i]) * inv_dz;
ftype dEzx = (Ez[i + px] - Ez[i]) * inv_dx;
ftype dEzy = (Ez[i + py] - Ez[i]) * inv_dy;
{% for bloch in bloch_boundaries -%}
{%- set r = bloch['axis'] -%}
{%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
if ({{r}} == s{{r}} - 1) {
ftype bloch_re = {{bloch['real']}};
ftype bloch_im = {{bloch['imag']}};
dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
}
{% endfor -%}
{%- if do_poynting %}
@ -49,6 +71,8 @@ ftype aEzy = Ez[i + py] + Ez[i];
{%- endif %}
/*
* PML Update
*/
@ -57,29 +81,35 @@ ftype pHxi = 0;
ftype pHyi = 0;
ftype pHzi = 0;
{%- for r, p in pmls -%}
{% for pml in pmls -%}
{%- set r = pml['axis'] -%}
{%- set p = pml['polarity'] -%}
{%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
{%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%}
{%- if r != 'y' -%}
{%- set se, sh = '-', '+' -%}
{%- else -%}
{%- set se, sh = '+', '-' -%}
{%- endif -%}
{%- endif %}
int pml_{{r ~ p}}_thickness = {{pml['thickness']}};
{%- if p == 'n' %}
if ( {{r}} < pml_thickness ) {
if ( {{r}} < pml_{{r ~ p}}_thickness ) {
const size_t ir = {{r}}; // index into pml parameters
{%- elif p == 'p' %}
if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters
{%- endif %}
const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi
{{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}};
{{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}};
dE{{v ~ r}} *= p{{r}}2h{{p}}[ir];
dE{{u ~ r}} *= p{{r}}2h{{p}}[ir];
{{psi ~ u}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1h{{p}}[ir] * dE{{v ~ r}};
{{psi ~ v}}[ip] = p{{r}}0h{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1h{{p}}[ir] * dE{{u ~ r}};
pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
pH{{v}}i {{se}}= {{psi ~ v}}[ip];
}
@ -96,9 +126,9 @@ ftype Hz_old = Hz[i];
{%- endif %}
// H update equations
Hx[i] -= dt * (dEzy - dEyz + pHxi);
Hy[i] -= dt * (dExz - dEzx + pHyi);
Hz[i] -= dt * (dEyx - dExy + pHzi);
Hx[i] -= dt * (dEzy - dEyz - pHxi);
Hy[i] -= dt * (dExz - dEzx - pHyi);
Hz[i] -= dt * (dEyx - dExy - pHzi);
{% if do_poynting -%}
// Average H across timesteps

View File

@ -0,0 +1,32 @@
/*
* Update E-field from J field
*
* Template parameters:
* common_header: Rendered contents of common.cl
*
* OpenCL args:
* E, Jr, Ji, c, s, xmin, xmax, ymin, ymax, zmin, zmax
*/
{{common_header}}
////////////////////////////////////////////////////////////////////////////
__global ftype *Jrx = Jr + XX;
__global ftype *Jry = Jr + YY;
__global ftype *Jrz = Jr + ZZ;
__global ftype *Jix = Ji + XX;
__global ftype *Jiy = Ji + YY;
__global ftype *Jiz = Ji + ZZ;
if (x < xmin || y < ymin || z < zmin) {
PYOPENCL_ELWISE_CONTINUE;
}
if (x >= xmax || y >= ymax || z >= zmax) {
PYOPENCL_ELWISE_CONTINUE;
}
Ex[i] += c * Jrx[i] + s * Jix[i];
Ey[i] += c * Jry[i] + s * Jiy[i];
Ez[i] += c * Jrz[i] + s * Jiz[i];

376
opencl_fdtd/simulation.py Normal file
View File

@ -0,0 +1,376 @@
"""
Class for constructing and holding the basic FDTD operations and fields
"""
from typing import List, Dict, Callable
from collections import OrderedDict
import numpy
import jinja2
import warnings
import pyopencl
import pyopencl.array
from pyopencl.elementwise import ElementwiseKernel
from fdfd_tools import vec
__author__ = 'Jan Petykiewicz'
# Create jinja2 env on module load
jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
class Simulation(object):
"""
Constructs and holds the basic FDTD operations and related fields
After constructing this object, call the (update_E, update_H, update_S) members
to perform FDTD updates on the stored (E, H, S) fields:
pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
with open('sources.c', 'w') as f:
f.write('{}'.format(sim.sources))
for t in range(max_t):
sim.update_E([]).wait()
# Find the linear index for the center point, for Ey
ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + \
numpy.prod(grid.shape) * 1
# Perturb the field (i.e., add a soft current source)
sim.E[ind] += numpy.sin(omega * t * sim.dt)
event = sim.update_H([])
if sim.update_S:
event = sim.update_S([event])
event.wait()
with lzma.open('saved_simulation', 'wb') as f:
dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f)
Code in the form
event2 = sim.update_H([event0, event1])
indicates that the update_H operation should be prepared immediately, but wait for
event0 and event1 to occur (i.e. previous operations to finish) before starting execution.
event2 can then be used to prepare further operations to be run after update_H.
"""
E = None # type: pyopencl.array.Array
H = None # type: pyopencl.array.Array
S = None # type: pyopencl.array.Array
eps = None # type: pyopencl.array.Array
dt = None # type: float
inv_dxes = None # type: List[pyopencl.array.Array]
arg_type = None # type: numpy.float32 or numpy.float64
context = None # type: pyopencl.Context
queue = None # type: pyopencl.CommandQueue
update_E = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
update_H = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
update_S = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
update_J = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
sources = None # type: Dict[str, str]
def __init__(self,
epsilon: List[numpy.ndarray],
pmls: List[Dict[str, int or float]],
bloch_boundaries: List[Dict[str, int or float]] = (),
dxes: List[List[numpy.ndarray]] or float = None,
dt: float = None,
initial_fields: Dict[str, List[numpy.ndarray]] = None,
context: pyopencl.Context = None,
queue: pyopencl.CommandQueue = None,
float_type: numpy.float32 or numpy.float64 = numpy.float32,
do_poynting: bool = True,
do_fieldsrc: bool = False):
"""
Initialize the simulation.
:param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
spanning the simulation domain. Relative epsilon is used.
:param pmls: List of dicts with keys:
'axis': One of 'x', 'y', 'z'.
'direction': One of 'n', 'p'.
'thickness': Number of layers, default 8.
'epsilon_eff': Effective epsilon to match to. Default 1.0.
'mu_eff': Effective mu to match to. Default 1.0.
'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
'm': Polynomial grading exponent. Default 3.5.
'ma': Exponent for alpha. Default 1.
:param bloch_boundaries: List of dicts with keys:
'axis': One of 'x', 'y', 'z'.
'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
:param dt: Time step. Default is min(dxes) * .99/sqrt(3).
:param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
specified fields (default is 0 everywhere). Fields have same format as epsilon.
:param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
:param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
:param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
:param do_poynting: If true, enables calculation of the poynting vector, S.
Poynting vector calculation adds the following computational burdens:
* During update_H, ~6 extra additions/cell are performed in order to spatially
average E and temporally average H. These quantities are multiplied
(6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
* update_S performs a discrete cross product using the precalculated products
from update_H. This is not nice to the cache and similar to e.g. update_E
in complexity.
* GPU memory requirements are approximately doubled, since S and the intermediate
products must be stored.
"""
if initial_fields is None:
initial_fields = {}
self.shape = epsilon[0].shape
self.arg_type = float_type
self.sources = {}
self._create_context(context, queue)
self._create_eps(epsilon)
if dxes is None:
dxes = 1.0
if isinstance(dxes, (float, int)):
uniform_dx = dxes
min_dx = dxes
else:
uniform_dx = False
self.inv_dxes = [self._create_field(1 / dxn) for dxn in dxes[0] + dxes[1]]
min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1])
max_dt = min_dx * .99 / numpy.sqrt(3)
if dt is None:
self.dt = max_dt
elif dt > max_dt:
warnings.warn('Warning: unstable dt: {}'.format(dt))
elif dt <= 0:
raise Exception('Invalid dt: {}'.format(dt))
else:
self.dt = dt
self.E = self._create_field(initial_fields.get('E', None))
self.H = self._create_field(initial_fields.get('H', None))
if bloch_boundaries:
self.F = self._create_field(initial_fields.get('F', None))
self.G = self._create_field(initial_fields.get('G', None))
for pml in pmls:
pml.setdefault('thickness', 8)
pml.setdefault('epsilon_eff', 1.0)
pml.setdefault('mu_eff', 1.0)
pml.setdefault('ln_R_per_layer', -1.6)
pml.setdefault('m', 3.5)
pml.setdefault('cfs_alpha', 0)
pml.setdefault('ma', 1)
ctype = type_to_C(self.arg_type)
def ptr(arg: str) -> str:
return ctype + ' *' + arg
base_fields = OrderedDict()
base_fields[ptr('E')] = self.E
base_fields[ptr('H')] = self.H
base_fields[ctype + ' dt'] = self.dt
if uniform_dx == False:
inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz']
for name, field in zip(inv_dx_names, self.inv_dxes):
base_fields[ptr(name)] = field
eps_field = OrderedDict()
eps_field[ptr('eps')] = self.eps
if bloch_boundaries:
base_fields[ptr('F')] = self.F
base_fields[ptr('G')] = self.G
bloch_fields = OrderedDict()
bloch_fields[ptr('E')] = self.F
bloch_fields[ptr('H')] = self.G
bloch_fields[ctype + ' dt'] = self.dt
bloch_fields[ptr('F')] = self.E
bloch_fields[ptr('G')] = self.H
common_source = jinja_env.get_template('common.cl').render(
ftype=ctype,
shape=self.shape,
)
jinja_args = {
'common_header': common_source,
'pmls': pmls,
'do_poynting': do_poynting,
'bloch': bloch_boundaries,
'uniform_dx': uniform_dx,
}
E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
self.sources['E'] = E_source
self.sources['H'] = H_source
if bloch_boundaries:
bloch_args = jinja_args.copy()
bloch_args['do_poynting'] = False
bloch_args['bloch'] = [{'axis': b['axis'],
'real': b['imag'],
'imag': b['real']}
for b in bloch_boundaries]
F_source = jinja_env.get_template('update_e.cl').render(**bloch_args)
G_source = jinja_env.get_template('update_h.cl').render(**bloch_args)
self.sources['F'] = F_source
self.sources['G'] = G_source
S_fields = OrderedDict()
if do_poynting:
S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
self.sources['S'] = S_source
self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type)
self.S = pyopencl.array.zeros_like(self.E)
S_fields[ptr('oS')] = self.oS
S_fields[ptr('S')] = self.S
J_fields = OrderedDict()
if do_fieldsrc:
J_source = jinja_env.get_template('update_j.cl').render(**jinja_args)
self.sources['J'] = J_source
self.Ji = pyopencl.array.zeros_like(self.E)
self.Jr = pyopencl.array.zeros_like(self.E)
J_fields[ptr('Jr')] = self.Jr
J_fields[ptr('Ji')] = self.Ji
'''
PML
'''
pml_e_fields, pml_h_fields = self._create_pmls(pmls)
if bloch_boundaries:
pml_f_fields, pml_g_fields = self._create_pmls(pmls)
'''
Create operations
'''
self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
if do_poynting:
self.update_S = self._create_operation(S_source, (base_fields, S_fields))
if bloch_boundaries:
self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))
if do_fieldsrc:
args = OrderedDict()
[args.update(d) for d in (base_fields, J_fields)]
var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')]
update = ElementwiseKernel(self.context, operation=J_source,
arguments=', '.join(list(args.keys()) + var_args))
self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)
def _create_pmls(self, pmls):
ctype = type_to_C(self.arg_type)
def ptr(arg: str) -> str:
return ctype + ' *' + arg
pml_e_fields = OrderedDict()
pml_h_fields = OrderedDict()
for pml in pmls:
a = 'xyz'.find(pml['axis'])
sigma_max = -pml['ln_R_per_layer'] / 2 * (pml['m'] + 1)
kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff'])
alpha_max = pml['cfs_alpha']
def par(x):
scaling = (x / pml['thickness']) ** pml['m']
sigma = scaling * sigma_max
kappa = 1 + scaling * (kappa_max - 1)
alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
p0 = numpy.exp(-(sigma / kappa + alpha) * self.dt)
p1 = sigma / (sigma + kappa * alpha) * (p0 - 1)
p2 = 1 / kappa
return p0, p1, p2
xe, xh = (numpy.arange(1, pml['thickness'] + 1, dtype=self.arg_type)[::-1] for _ in range(2))
if pml['polarity'] == 'p':
xe -= 0.5
elif pml['polarity'] == 'n':
xh -= 0.5
pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh']
for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)):
pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe)
pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph)
uv = 'xyz'.replace(pml['axis'], '')
psi_base = 'Psi_' + pml['axis'] + pml['polarity'] + '_'
psi_names = [[psi_base + eh + c for c in uv] for eh in 'EH']
psi_shape = list(self.shape)
psi_shape[a] = pml['thickness']
for ne, nh in zip(*psi_names):
pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
return pml_e_fields, pml_h_fields
def _create_operation(self, source, args_fields):
args = OrderedDict()
[args.update(d) for d in args_fields]
update = ElementwiseKernel(self.context, operation=source,
arguments=', '.join(args.keys()))
return lambda e: update(*args.values(), wait_for=e)
def _create_context(self, context: pyopencl.Context = None,
queue: pyopencl.CommandQueue = None):
if context is None:
self.context = pyopencl.create_some_context()
else:
self.context = context
if queue is None:
self.queue = pyopencl.CommandQueue(self.context)
else:
self.queue = queue
def _create_eps(self, epsilon: List[numpy.ndarray]):
if len(epsilon) != 3:
raise Exception('Epsilon must be a list with length of 3')
if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
if not epsilon[0].shape == self.shape:
raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape))
self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type))
def _create_field(self, initial_value: List[numpy.ndarray] = None):
if initial_value is None:
return pyopencl.array.zeros_like(self.eps)
else:
if len(initial_value) != 3:
Exception('Initial field value must be a list of length 3')
if not all((f.shape == self.shape for f in initial_value)):
Exception('Initial field list elements must have same shape as epsilon elements')
return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
def type_to_C(float_type) -> str:
"""
Returns a string corresponding to the C equivalent of a numpy type.
Only works for float16, float32, float64.
:param float_type: e.g. numpy.float32
:return: string containing the corresponding C type (eg. 'double')
"""
if float_type == numpy.float16:
arg_type = 'half'
elif float_type == numpy.float32:
arg_type = 'float'
elif float_type == numpy.float64:
arg_type = 'double'
else:
raise Exception('Unsupported type')
return arg_type

View File

@ -2,7 +2,7 @@ numpy
h5py
pyopencl
jinja2
-e git+https://mpxd.net/gogs/jan/float_raster.git@release#egg=float_raster
-e git+https://mpxd.net/gogs/jan/gridlock.git@release#egg=gridlock
-e git+https://mpxd.net/gogs/jan/masque.git@release#egg=masque
-e git+https://mpxd.net/gogs/jan/fdfd_tools.git@master#egg=fdfd_tools
-e git+https://mpxd.net/code/jan/float_raster.git@release#egg=float_raster
-e git+https://mpxd.net/code/jan/gridlock.git@release#egg=gridlock
-e git+https://mpxd.net/code/jan/masque.git@release#egg=masque
-e git+https://mpxd.net/code/jan/fdfd_tools.git@master#egg=fdfd_tools

30
setup.py Normal file
View File

@ -0,0 +1,30 @@
#!/usr/bin/env python3
from setuptools import setup, find_packages
import opencl_fdtd
with open('README.md', 'r') as f:
long_description = f.read()
setup(name='opencl_fdtd',
version=opencl_fdtd.version,
description='OpenCL FDTD solver',
long_description=long_description,
long_description_content_type='text/markdown',
author='Jan Petykiewicz',
author_email='anewusername@gmail.com',
url='https://mpxd.net/code/jan/opencl_fdtd',
packages=find_packages(),
package_data={
'opencl_fdfd': ['kernels/*']
},
install_requires=[
'numpy',
'pyopencl',
'jinja2',
'fdfd_tools>=0.3',
],
extras_require={
},
)