Compare commits

...

35 Commits

Author SHA1 Message Date
5b16295769 update deps 2024-07-30 23:19:31 -07:00
fe684d83d3 add ruff and mypy configs 2024-07-30 23:19:21 -07:00
7f620a9bb3 modern rng approach 2024-07-30 23:19:07 -07:00
50b30d31fb fixes driven by ruff & mypy 2024-07-30 23:18:54 -07:00
b703f1ee20 drop object superclass 2024-01-10 20:57:40 -08:00
5b1e758c27 typo fix 2024-01-10 20:57:23 -08:00
d0011cb1f9 type modernization 2024-01-10 20:54:12 -08:00
654f9c165b formatting 2024-01-10 20:52:52 -08:00
jan
689b3176cc style and type fixes 2022-11-24 16:12:40 -08:00
77c10feead gitignore caches 2022-11-22 14:31:57 -08:00
08a9328390 move to hatch-based build 2022-11-22 14:31:43 -08:00
f489eb6e06 fix template loading 2021-10-31 19:48:45 -07:00
b2043ce841 add comment about pml coords 2021-10-31 19:48:24 -07:00
c2e5c94e14 remove unused pcgen.py 2021-10-31 19:48:05 -07:00
c75a7e67c9 f-stringify 2021-10-31 19:47:48 -07:00
26ae8578c9 update to use gridlock >=1.0 2021-10-31 19:47:25 -07:00
4c81f33d05 vec/unvec live in meanas.fdmath 2021-07-11 17:35:25 -07:00
1403b9e1a3 vec comes from fdmath 2021-07-11 17:32:14 -07:00
e58efd2127 depend on meanas 2021-07-11 17:06:52 -07:00
a6384429ee use new email 2021-07-11 17:06:43 -07:00
292fde05d5 convert docstring to cleaner style 2021-07-11 17:06:32 -07:00
bbd33844b3 add return type annotation 2021-07-11 17:05:55 -07:00
2b96e5dd84 simplify print 2021-07-11 17:05:42 -07:00
922161c0e6 improve type annotation 2021-07-11 17:05:17 -07:00
da3216133a clarify that just a plain sum is used (rather than mean) 2021-07-11 17:05:02 -07:00
45e047c50e Move to VERSION.py approach for version 2021-07-11 17:04:18 -07:00
8afe1d1f26 style fixes per flake8 2020-10-16 19:32:53 -07:00
8cbb0e9864 call update_s during loop 2020-10-16 19:32:35 -07:00
4e0bd8b3c6 switch to meanas 2020-10-16 19:32:22 -07:00
a4dd031666 ongoing 2019-08-30 22:13:26 -07:00
0c7b2fd3a2 add test_simulation 2019-07-30 00:33:35 -07:00
23d65d6ebb Add poynting halves
maybe will remove this later... dunno if it's actually useful
2019-07-30 00:33:07 -07:00
cf0db63a3f Revert "Add restrict keyword to pointers (not sharing the same memory for multiple fields)"
This reverts commit 60b70bb332.

It appears to have minimal performance impact, and fails to compile on
nvidia cards.
2019-07-17 00:53:55 -07:00
89d99187b6 Remove unused code 2019-07-17 00:52:31 -07:00
1627d0da05 Consolidate poynting code into update_h 2019-07-15 00:11:09 -07:00
14 changed files with 890 additions and 551 deletions

29
.flake8 Normal file
View File

@ -0,0 +1,29 @@
[flake8]
ignore =
# E501 line too long
E501,
# W391 newlines at EOF
W391,
# E241 multiple spaces after comma
E241,
# E302 expected 2 newlines
E302,
# W503 line break before binary operator (to be deprecated)
W503,
# E265 block comment should start with '# '
E265,
# E123 closing bracket does not match indentation of opening bracket's line
E123,
# E124 closing bracket does not match visual indentation
E124,
# E221 multiple spaces before operator
E221,
# E201 whitespace after '['
E201,
# E741 ambiguous variable name 'I'
E741,
per-file-ignores =
# F401 import without use
*/__init__.py: F401,

4
.gitignore vendored
View File

@ -1,9 +1,11 @@
.idea/
*.h5
__pycache__
__pycache__/
*.py[cod]
build/
dist/
*.egg-info/
.pytest_cache/
.mypy_cache/

199
fdtd.py
View File

@ -7,6 +7,7 @@ See main() for simulation setup.
import sys
import time
import logging
import pyopencl
import numpy
import lzma
@ -16,7 +17,7 @@ from opencl_fdtd import Simulation
from masque import Pattern, shapes
import gridlock
import pcgen
import fdfd_tools
import meanas
__author__ = 'Jan Petykiewicz'
@ -29,60 +30,69 @@ def perturbed_l3(a: float, radius: float, **kwargs) -> Pattern:
"""
Generate a masque.Pattern object containing a perturbed L3 cavity.
:param a: Lattice constant.
:param radius: Hole radius, in units of a (lattice constant).
:param kwargs: Keyword arguments:
hole_dose, trench_dose, hole_layer, trench_layer: Shape properties for Pattern.
Defaults *_dose=1, hole_layer=0, trench_layer=1.
shifts_a, shifts_r: passed to pcgen.l3_shift; specifies lattice constant (1 -
multiplicative factor) and radius (multiplicative factor) for shifting
holes adjacent to the defect (same row). Defaults are 0.15 shift for
first hole, 0.075 shift for third hole, and no radius change.
Args:
a: Lattice constant.
radius: Hole radius, in units of a (lattice constant).
hole_dose: Dose for all holes. Default 1.
trench_dose: Dose for undercut trenches. Default 1.
hole_layer: Layer for holes. Default (0, 0).
trench_layer: Layer for undercut trenches. Default (1, 0).
shifts_a: passed to pcgen.l3_shift
shifts_r: passed to pcgen.l3_shift
xy_size: [x, y] number of mirror periods in each direction; total size is
2 * n + 1 holes in each direction. Default [10, 10].
2 * n + 1 holes in each direction. Default (10, 10).
perturbed_radius: radius of holes perturbed to form an upwards-driected beam
(multiplicative factor). Default 1.1.
trench width: Width of the undercut trenches. Default 1.2e3.
:return: masque.Pattern object containing the L3 design
Returns:
`masque.Pattern` object containing the L3 design
"""
default_args = {'hole_dose': 1,
'trench_dose': 1,
'hole_layer': 0,
'trench_layer': 1,
'shifts_a': (0.15, 0, 0.075),
'shifts_r': (1.0, 1.0, 1.0),
'xy_size': (10, 10),
'perturbed_radius': 1.1,
'trench_width': 1.2e3,
}
default_args = {
'hole_dose': 1,
'trench_dose': 1,
'hole_layer': 0,
'trench_layer': 1,
'shifts_a': (0.15, 0, 0.075),
'shifts_r': (1.0, 1.0, 1.0),
'xy_size': (10, 10),
'perturbed_radius': 1.1,
'trench_width': 1.2e3,
}
kwargs = {**default_args, **kwargs}
xyr = pcgen.l3_shift_perturbed_defect(mirror_dims=kwargs['xy_size'],
perturbed_radius=kwargs['perturbed_radius'],
shifts_a=kwargs['shifts_a'],
shifts_r=kwargs['shifts_r'])
xyr = pcgen.l3_shift_perturbed_defect(
mirror_dims=kwargs['xy_size'],
perturbed_radius=kwargs['perturbed_radius'],
shifts_a=kwargs['shifts_a'],
shifts_r=kwargs['shifts_r'],
)
xyr *= a
xyr[:, 2] *= radius
pat = Pattern()
pat.name = 'L3p-a{:g}r{:g}rp{:g}'.format(a, radius, kwargs['perturbed_radius'])
pat.name = f'L3p-a{a:g}r{radius:g}rp{kwargs["perturbed_radius"]:g}'
pat.shapes += [shapes.Circle(radius=r, offset=(x, y),
dose=kwargs['hole_dose'],
layer=kwargs['hole_layer'])
for x, y, r in xyr]
maxes = numpy.max(numpy.fabs(xyr), axis=0)
pat.shapes += [shapes.Polygon.rectangle(
lx=(2 * maxes[0]), ly=kwargs['trench_width'],
offset=(0, s * (maxes[1] + a + kwargs['trench_width'] / 2)),
dose=kwargs['trench_dose'], layer=kwargs['trench_layer'])
for s in (-1, 1)]
pat.shapes += [
shapes.Polygon.rectangle(
lx=(2 * maxes[0]),
ly=kwargs['trench_width'],
offset=(0, s * (maxes[1] + a + kwargs['trench_width'] / 2)),
dose=kwargs['trench_dose'],
layer=kwargs['trench_layer'],
)
for s in (-1, 1)]
return pat
def main():
max_t = 8000 # number of timesteps
max_t = 4000 # number of timesteps
dx = 25 # discretization (nm/cell)
pml_thickness = 8 # (number of cells)
@ -101,36 +111,43 @@ def main():
n_air = 1.0 # air
# Half-dimensions of the simulation grid
xy_max = (xy_size + 1) * a * [1, numpy.sqrt(3)/2]
z_max = 1.6 * a
xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx
# Coordinates of the edges of the cells.
# The fdtd package can only do square grids at the moment.
half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max]
edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords]
# xy_max = (xy_size + 1) * a * [1, numpy.sqrt(3)/2]
# z_max = 1.6 * a
# xyz_max = numpy.hstack((xy_max, z_max)) + pml_thickness * dx
#
# # Coordinates of the edges of the cells.
# # The fdtd package can only do square grids at the moment.
# half_edge_coords = [numpy.arange(dx/2, m + dx, step=dx) for m in xyz_max]
# edge_coords = [numpy.hstack((-h[::-1], h)) for h in half_edge_coords]
edge_coords = [numpy.arange(-100.5, 101), numpy.arange(-1, 1), numpy.arange(-100.5, 101)]
# edge_coords = [numpy.arange(-100.5, 101), numpy.arange(-100.5, 101), numpy.arange(-1, 1)]
# #### Create the grid, mask, and draw the device ####
grid = gridlock.Grid(edge_coords, initial=n_air**2, num_grids=3)
grid.draw_slab(surface_normal=gridlock.Direction.z,
center=[0, 0, 0],
thickness=th,
eps=n_slab**2)
mask = perturbed_l3(a, r)
grid = gridlock.Grid(edge_coords)
epsilon = grid.allocate(n_air**2)
# grid.draw_slab(epsilon,
# surface_normal=2,
# center=[0, 0, 0],
# thickness=th,
# eps=n_slab**2)
# mask = perturbed_l3(a, r)
#
# grid.draw_polygons(epsilon,
# surface_normal=2,
# center=[0, 0, 0],
# thickness=2 * th,
# eps=n_air**2,
# polygons=mask.as_polygons())
grid.draw_polygons(surface_normal=gridlock.Direction.z,
center=[0, 0, 0],
thickness=2 * th,
eps=n_air**2,
polygons=mask.as_polygons())
logger.info('grid shape: {}'.format(grid.shape))
logger.info(f'grid shape: {grid.shape}')
# #### Create the simulation grid ####
# pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
# for a in 'xyz' for p in 'np']
pmls = [{'axis': a, 'polarity': p, 'thickness': pml_thickness}
for a in 'xyz' for p in 'np']
for a in 'xz' for p in 'np']
#bloch = [{'axis': a, 'real': 1, 'imag': 0} for a in 'x']
bloch = []
sim = Simulation(grid.grids, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
sim = Simulation(epsilon, do_poynting=True, pmls=pmls, bloch_boundaries=bloch)
# Source parameters and function
w = 2 * numpy.pi * dx / wl
@ -155,36 +172,88 @@ def main():
f.write('\n=====================G=====================\n')
f.write(sim.sources['G'])
planes = numpy.empty((max_t, 4))
planes2 = numpy.empty((max_t, 4))
Ectr = numpy.empty(max_t)
u = numpy.empty(max_t)
ui = numpy.empty(max_t)
# #### Run a bunch of iterations ####
# event = sim.whatever([prev_event]) indicates that sim.whatever should be queued
# immediately and run once prev_event is finished.
start = time.perf_counter()
for t in range(max_t):
e = sim.update_E([])
if bloch:
e = sim.update_F([e])
# if bloch:
# e = sim.update_F([e])
e.wait()
ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
sim.E[ind] += field_source(t)
# sim.E[ind] += field_source(t)
if t == 2:
sim.E[ind] += 1e6
h_old = sim.H.copy()
e = sim.update_H([])
if bloch:
e = sim.update_G([e])
if sim.update_S:
e = sim.update_S([e])
# if bloch:
# e = sim.update_G([e])
e.wait()
S = sim.S.get().reshape(epsilon.shape) * sim.dt * dx * dx *dx
m = 30
planes[t] = (
S[0][+pml_thickness+2, :, pml_thickness+3:-pml_thickness-3].sum(),
S[0][-pml_thickness-2, :, pml_thickness+3:-pml_thickness-3].sum(),
S[2][pml_thickness+2:-pml_thickness-2, :, +pml_thickness+2].sum(),
S[2][pml_thickness+2:-pml_thickness-2, :, -pml_thickness-2].sum(),
)
planes2[t] = (
S[0][grid.shape[0]//2-1, 0, grid.shape[2]//2].sum(),
S[0][grid.shape[0]//2 , 0, grid.shape[2]//2].sum(),
S[2][grid.shape[0]//2 , 0, grid.shape[2]//2-1].sum(),
S[2][grid.shape[0]//2 , 0, grid.shape[2]//2].sum(),
)
# planes[t] = (
# S[0][+pml_thickness+m, pml_thickness+m+1:-pml_thickness-m, :].sum(),
# S[0][-pml_thickness-m, pml_thickness+m+1:-pml_thickness-m, :].sum(),
# S[1][pml_thickness+1+m:-pml_thickness-m, +pml_thickness+m, :].sum(),
# S[1][pml_thickness+1+m:-pml_thickness-m, -pml_thickness-m, :].sum(),
# )
# planes2[t] = (
# S[0][grid.shape[0]//2-1, grid.shape[1]//2 , 0].sum(),
# S[0][grid.shape[0]//2 , grid.shape[1]//2 , 0].sum(),
# S[1][grid.shape[0]//2 , grid.shape[1]//2-1, 0].sum(),
# S[1][grid.shape[0]//2 , grid.shape[1]//2 , 0].sum(),
# )
Ectr[t] = sim.E[ind].get()
u[t] = pyopencl.array.sum(sim.E * sim.E * sim.eps + h_old * sim.H).get() * dx * dx * dx
ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(epsilon.shape).get()[:, pml_thickness+m:-pml_thickness-m, :,
pml_thickness+m:-pml_thickness-m].sum() * dx * dx * dx
# ui[t] = (sim.E * sim.E * sim.eps + h_old * sim.H).reshape(epsilon.shape).get()[:, pml_thickness+m:-pml_thickness-m,
# pml_thickness+m:-pml_thickness-m, :].sum() * dx * dx * dx
if t % 100 == 0:
logger.info('iteration {}: average {} iterations per sec'.format(t, (t+1)/(time.perf_counter()-start)))
avg = (t + 1) / (time.perf_counter() - start)
logger.info(f'iteration {t}: average {avg} iterations per sec')
sys.stdout.flush()
with lzma.open('saved_simulation', 'wb') as f:
def unvec(f):
return fdfd_tools.unvec(f, grid.shape)
return meanas.fdmath.unvec(f, grid.shape)
d = {
'grid': grid,
'epsilon': epsilon,
'E': unvec(sim.E.get()),
'H': unvec(sim.H.get()),
'dt': sim.dt,
'dx': dx,
'planes': planes,
'planes2': planes2,
'Ectr': Ectr,
'u': u,
'ui': ui,
}
if sim.S is not None:
d['S'] = unvec(sim.S.get())

1
opencl_fdtd/LICENSE.md Symbolic link
View File

@ -0,0 +1 @@
../LICENSE.md

1
opencl_fdtd/README.md Symbolic link
View File

@ -0,0 +1 @@
../README.md

View File

@ -1,5 +1,8 @@
from .simulation import Simulation, type_to_C
from .simulation import (
Simulation as Simulation,
type_to_C as type_to_C,
)
__author__ = 'Jan Petykiewicz'
version = '0.4'
__version__ = '0.4'
version = __version__

View File

@ -92,7 +92,7 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi
dH{{v ~ r}} *= p{{r}}2e{{p}}[ir];
dH{{u ~ r}} *= p{{r}}2e{{p}}[ir];
{{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}};
{{psi ~ u}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ u}}[ip] + p{{r}}1e{{p}}[ir] * dH{{v ~ r}}; //note u uses v
{{psi ~ v}}[ip] = p{{r}}0e{{p}}[ir] * {{psi ~ v}}[ip] + p{{r}}1e{{p}}[ir] * dH{{u ~ r}};
pE{{u}}i {{se}}= {{psi ~ u}}[ip];
pE{{v}}i {{sh}}= {{psi ~ v}}[ip];

View File

@ -6,13 +6,14 @@
* common_header: Rendered contents of common.cl
* pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
* axes, polarities, and thicknesses.
* do_poynting: Whether to precalculate poynting vector components (boolean)
* do_poynting: Whether to calculate poynting vector (boolean)
* do_poynting_halves: Whether to calculate half-step poynting vectors (boolean)
* uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
* Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as
* OpenCL args.
*
* OpenCL args:
* E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
* E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [S], [S0, S1]
*/
{{common_header}}
@ -52,24 +53,7 @@ if ({{r}} == s{{r}} - 1) {
dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
}
{% endfor -%}
{%- if do_poynting %}
/*
* Precalculate averaged E
*/
ftype aExy = Ex[i + py] + Ex[i];
ftype aExz = Ex[i + pz] + Ex[i];
ftype aEyx = Ey[i + px] + Ey[i];
ftype aEyz = Ey[i + pz] + Ey[i];
ftype aEzx = Ez[i + px] + Ez[i];
ftype aEzy = Ez[i + py] + Ez[i];
{%- endif %}
{%- endfor %}
@ -118,7 +102,7 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
/*
* Update H
*/
{% if do_poynting -%}
{% if do_poynting or do_poynting_halves -%}
// Save old H for averaging
ftype Hx_old = Hx[i];
ftype Hy_old = Hy[i];
@ -131,25 +115,40 @@ Hy[i] -= dt * (dExz - dEzx - pHyi);
Hz[i] -= dt * (dEyx - dExy - pHzi);
{% if do_poynting -%}
// Average H across timesteps
ftype aHxt = Hx[i] + Hx_old;
ftype aHyt = Hy[i] + Hy_old;
ftype aHzt = Hz[i] + Hz_old;
/*
* Calculate unscaled S components at H locations
* Calculate unscaled S components at ??? locations
* //TODO: document S locations and types
*/
__global ftype *oSxy = oS + 0 * field_size;
__global ftype *oSyz = oS + 1 * field_size;
__global ftype *oSzx = oS + 2 * field_size;
__global ftype *oSxz = oS + 3 * field_size;
__global ftype *oSyx = oS + 4 * field_size;
__global ftype *oSzy = oS + 5 * field_size;
__global ftype *Sx = S + XX;
__global ftype *Sy = S + YY;
__global ftype *Sz = S + ZZ;
oSxy[i] = aEyx * aHzt;
oSxz[i] = -aEzx * aHyt;
oSyz[i] = aEzy * aHxt;
oSyx[i] = -aExy * aHzt;
oSzx[i] = aExz * aHyt;
oSzy[i] = -aEyz * aHxt;
// Sum old and new H
ftype sHxt = Hx[i] + Hx_old;
ftype sHyt = Hy[i] + Hy_old;
ftype sHzt = Hz[i] + Hz_old;
Sx[i] = Ey[i + px] * sHzt - Ez[i + px] * sHyt;
Sy[i] = Ez[i + py] * sHxt - Ex[i + py] * sHzt;
Sz[i] = Ex[i + pz] * sHyt - Ey[i + pz] * sHxt;
{%- endif -%}
{% if do_poynting_halves -%}
/*
* Calculate unscaled S components at ??? locations
* //TODO: document S locations and types
*/
__global ftype *Sx0 = S0 + XX;
__global ftype *Sy0 = S0 + YY;
__global ftype *Sz0 = S0 + ZZ;
__global ftype *Sx1 = S1 + XX;
__global ftype *Sy1 = S1 + YY;
__global ftype *Sz1 = S1 + ZZ;
Sx0[i] = Ey[i + px] * Hz_old - Ez[i + px] * Hy_old;
Sy0[i] = Ez[i + py] * Hx_old - Ex[i + py] * Hz_old;
Sz0[i] = Ex[i + pz] * Hy_old - Ey[i + pz] * Hx_old;
Sx1[i] = Ey[i + px] * Hz[i] - Ez[i + px] * Hy[i];
Sy1[i] = Ez[i + py] * Hx[i] - Ex[i + py] * Hz[i];
Sz1[i] = Ex[i + pz] * Hy[i] - Ey[i + pz] * Hx[i];
{%- endif -%}

View File

@ -1,36 +0,0 @@
/*
* Update E-field, including any PMLs.
*
* Template parameters:
* common_header: Rendered contents of common.cl
* pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
* pml_thickness: Number of cells (integer)
*
* OpenCL args:
* E, H, dt, S, oS
*/
{{common_header}}
//////////////////////////////////////////////////////////////////////
/*
* Calculate S from oS (pre-calculated components)
*/
__global ftype *Sx = S + XX;
__global ftype *Sy = S + YY;
__global ftype *Sz = S + ZZ;
// Use unscaled S components from H locations
__global ftype *oSxy = oS + 0 * field_size;
__global ftype *oSyz = oS + 1 * field_size;
__global ftype *oSzx = oS + 2 * field_size;
__global ftype *oSxz = oS + 3 * field_size;
__global ftype *oSyx = oS + 4 * field_size;
__global ftype *oSzy = oS + 5 * field_size;
ftype s_factor = dt * 0.125;
Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor;
Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor;
Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor;

View File

@ -2,28 +2,35 @@
Class for constructing and holding the basic FDTD operations and fields
"""
from typing import List, Dict, Callable
from collections import OrderedDict
from collections.abc import Callable, Sequence
import numpy
from numpy.typing import NDArray
from numpy import floating, complexfloating
import jinja2
import warnings
import logging
import pyopencl
import pyopencl.array
from pyopencl.elementwise import ElementwiseKernel
from fdfd_tools import vec
from meanas.fdmath import vec
__author__ = 'Jan Petykiewicz'
logger = logging.getLogger(__name__)
# Create jinja2 env on module load
jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__.split('.')[0], 'kernels'))
class Simulation(object):
"""
class FDTDError(Exception):
""" Custom exception for opencl_fdtd """
pass
class Simulation:
r"""
Constructs and holds the basic FDTD operations and related fields
After constructing this object, call the (update_E, update_H, update_S) members
@ -32,7 +39,7 @@ class Simulation(object):
pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
sim = Simulation(grid.grids, do_poynting=True, pmls=pmls)
with open('sources.c', 'w') as f:
f.write('{}'.format(sim.sources))
f.write(f'{sim.sources}')
for t in range(max_t):
sim.update_E([]).wait()
@ -48,7 +55,7 @@ class Simulation(object):
event.wait()
with lzma.open('saved_simulation', 'wb') as f:
dill.dump(fdfd_tools.unvec(sim.E.get(), grid.shape), f)
dill.dump(meanas.fdmath.unvec(sim.E.get(), grid.shape), f)
Code in the form
event2 = sim.update_H([event0, event1])
@ -56,70 +63,70 @@ class Simulation(object):
event0 and event1 to occur (i.e. previous operations to finish) before starting execution.
event2 can then be used to prepare further operations to be run after update_H.
"""
E = None # type: pyopencl.array.Array
H = None # type: pyopencl.array.Array
S = None # type: pyopencl.array.Array
eps = None # type: pyopencl.array.Array
dt = None # type: float
inv_dxes = None # type: List[pyopencl.array.Array]
E: pyopencl.array.Array
H: pyopencl.array.Array
S: pyopencl.array.Array
eps: pyopencl.array.Array
dt: float
inv_dxes: list[pyopencl.array.Array]
arg_type = None # type: numpy.float32 or numpy.float64
arg_type: type
context = None # type: pyopencl.Context
queue = None # type: pyopencl.CommandQueue
context: pyopencl.Context
queue: pyopencl.CommandQueue
update_E = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
update_H = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
update_S = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
update_J = None # type: Callable[[List[pyopencl.Event]], pyopencl.Event]
sources = None # type: Dict[str, str]
update_E: Callable[[list[pyopencl.Event]], pyopencl.Event]
update_H: Callable[[list[pyopencl.Event]], pyopencl.Event]
update_S: Callable[[list[pyopencl.Event]], pyopencl.Event]
update_J: Callable[[list[pyopencl.Event]], pyopencl.Event]
sources: dict[str, str]
def __init__(self,
epsilon: List[numpy.ndarray],
pmls: List[Dict[str, int or float]],
bloch_boundaries: List[Dict[str, int or float]] = (),
dxes: List[List[numpy.ndarray]] or float = None,
dt: float = None,
initial_fields: Dict[str, List[numpy.ndarray]] = None,
context: pyopencl.Context = None,
queue: pyopencl.CommandQueue = None,
float_type: numpy.float32 or numpy.float64 = numpy.float32,
do_poynting: bool = True,
do_fieldsrc: bool = False):
def __init__(
self,
epsilon: NDArray,
pmls: Sequence[dict[str, float]],
bloch_boundaries: Sequence[dict[str, float]] = (),
dxes: list[list[NDArray]] | float | None = None,
dt: float | None = None,
initial_fields: dict[str, NDArray] | None = None,
context: pyopencl.Context | None = None,
queue: pyopencl.CommandQueue | None = None,
float_type: type = numpy.float32,
do_poynting: bool = True,
do_fieldsrc: bool = False,
) -> None:
"""
Initialize the simulation.
:param epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
Args:
epsilon: List containing [eps_r,xx, eps_r,yy, eps_r,zz], where each element is a Yee-shifted ndarray
spanning the simulation domain. Relative epsilon is used.
:param pmls: List of dicts with keys:
'axis': One of 'x', 'y', 'z'.
'direction': One of 'n', 'p'.
'thickness': Number of layers, default 8.
'epsilon_eff': Effective epsilon to match to. Default 1.0.
'mu_eff': Effective mu to match to. Default 1.0.
'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
'm': Polynomial grading exponent. Default 3.5.
'ma': Exponent for alpha. Default 1.
:param bloch_boundaries: List of dicts with keys:
'axis': One of 'x', 'y', 'z'.
'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
:param dt: Time step. Default is min(dxes) * .99/sqrt(3).
:param initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
specified fields (default is 0 everywhere). Fields have same format as epsilon.
:param context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
:param queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
:param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
:param do_poynting: If true, enables calculation of the poynting vector, S.
pmls: List of dicts with keys:
'axis': One of 'x', 'y', 'z'.
'direction': One of 'n', 'p'.
'thickness': Number of layers, default 8.
'epsilon_eff': Effective epsilon to match to. Default 1.0.
'mu_eff': Effective mu to match to. Default 1.0.
'ln_R_per_layer': Desired (ln(R) / thickness) value. Default -1.6.
'm': Polynomial grading exponent. Default 3.5.
'ma': Exponent for alpha. Default 1.
bloch_boundaries: List of dicts with keys:
'axis': One of 'x', 'y', 'z'.
'real': Real part of bloch phase factor (i.e. real(exp(i * phase)))
'imag': Imaginary part of bloch phase factor (i.e. imag(exp(i * phase)))
dt: Time step. Default is min(dxes) * .99/sqrt(3).
initial_fields: Dict with optional keys ('E', 'H', 'F', 'G') containing initial values for the
specified fields (default is 0 everywhere). Fields have same format as epsilon.
context: pyOpenCL context. If not given, pyopencl.create_some_context(False) is called.
queue: pyOpenCL command queue. If not given, pyopencl.CommandQueue(context) is called.
float_type: numpy.float32 or numpy.float64. Default numpy.float32.
do_poynting: If True, enables calculation of the poynting vector, S.
Poynting vector calculation adds the following computational burdens:
* During update_H, ~6 extra additions/cell are performed in order to spatially
average E and temporally average H. These quantities are multiplied
(6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
* update_S performs a discrete cross product using the precalculated products
from update_H. This is not nice to the cache and similar to e.g. update_E
in complexity.
* GPU memory requirements are approximately doubled, since S and the intermediate
products must be stored.
* During update_H, 12 extra additions/cell are performed in order to temporally
sum E and H. The results are then multiplied by E (6 multiplications/cell) and
then stored (6 writes/cell, cache-friendly). The E-field components are
reused from the H-field update and do not require additional H
* GPU memory requirements increase by 50% (for storing S)
"""
if initial_fields is None:
initial_fields = {}
@ -133,7 +140,7 @@ class Simulation(object):
if dxes is None:
dxes = 1.0
if isinstance(dxes, (float, int)):
if isinstance(dxes, float | int):
uniform_dx = dxes
min_dx = dxes
else:
@ -142,13 +149,14 @@ class Simulation(object):
min_dx = min(min(dxn) for dxn in dxes[0] + dxes[1])
max_dt = min_dx * .99 / numpy.sqrt(3)
logger.info(f'{min_dx=}, {max_dt=}, {dt=}')
if dt is None:
self.dt = max_dt
elif dt > max_dt:
warnings.warn('Warning: unstable dt: {}'.format(dt))
warnings.warn(f'Warning: unstable dt: {dt}', stacklevel=2)
elif dt <= 0:
raise Exception('Invalid dt: {}'.format(dt))
raise FDTDError(f'Invalid dt: {dt}')
else:
self.dt = dt
@ -170,42 +178,46 @@ class Simulation(object):
ctype = type_to_C(self.arg_type)
def ptr(arg: str) -> str:
return ctype + ' * restrict ' + arg
return ctype + ' *' + arg
base_fields = OrderedDict()
base_fields[ptr('E')] = self.E
base_fields[ptr('H')] = self.H
base_fields[ctype + ' dt'] = self.dt
if uniform_dx == False:
base_fields = {
ptr('E'): self.E,
ptr('H'): self.H,
ctype + ' dt': self.dt,
}
if uniform_dx is False:
inv_dx_names = ['inv_d' + eh + r for eh in 'eh' for r in 'xyz']
for name, field in zip(inv_dx_names, self.inv_dxes):
for name, field in zip(inv_dx_names, self.inv_dxes, strict=True):
base_fields[ptr(name)] = field
eps_field = OrderedDict()
eps_field[ptr('eps')] = self.eps
eps_field = {ptr('eps'): self.eps}
if bloch_boundaries:
base_fields[ptr('F')] = self.F
base_fields[ptr('G')] = self.G
base_fields |= {
ptr('F'): self.F,
ptr('G'): self.G,
}
bloch_fields = OrderedDict()
bloch_fields[ptr('E')] = self.F
bloch_fields[ptr('H')] = self.G
bloch_fields[ctype + ' dt'] = self.dt
bloch_fields[ptr('F')] = self.E
bloch_fields[ptr('G')] = self.H
bloch_fields = {
ptr('E'): self.F,
ptr('H'): self.G,
ctype + ' dt': self.dt,
ptr('F'): self.E,
ptr('G'): self.H,
}
common_source = jinja_env.get_template('common.cl').render(
ftype=ctype,
shape=self.shape,
)
ftype=ctype,
shape=self.shape,
)
jinja_args = {
'common_header': common_source,
'pmls': pmls,
'do_poynting': do_poynting,
'bloch': bloch_boundaries,
'uniform_dx': uniform_dx,
}
'common_header': common_source,
'pmls': pmls,
'do_poynting': do_poynting,
'do_poynting_halves': do_poynting_halves,
'bloch': bloch_boundaries,
'uniform_dx': uniform_dx,
}
E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
self.sources['E'] = E_source
@ -214,27 +226,28 @@ class Simulation(object):
if bloch_boundaries:
bloch_args = jinja_args.copy()
bloch_args['do_poynting'] = False
bloch_args['bloch'] = [{'axis': b['axis'],
'real': b['imag'],
'imag': b['real']}
for b in bloch_boundaries]
bloch_args['bloch'] = [{
'axis': b['axis'],
'real': b['imag'],
'imag': b['real'],
}
for b in bloch_boundaries]
F_source = jinja_env.get_template('update_e.cl').render(**bloch_args)
G_source = jinja_env.get_template('update_h.cl').render(**bloch_args)
self.sources['F'] = F_source
self.sources['G'] = G_source
S_fields = OrderedDict()
S_fields = {}
if do_poynting:
S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
self.sources['S'] = S_source
self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type)
self.S = pyopencl.array.zeros_like(self.E)
S_fields[ptr('oS')] = self.oS
S_fields[ptr('S')] = self.S
if do_poynting_halves:
self.S0 = pyopencl.array.zeros_like(self.E)
self.S1 = pyopencl.array.zeros_like(self.E)
S_fields[ptr('S0')] = self.S0
S_fields[ptr('S1')] = self.S1
J_fields = OrderedDict()
J_fields = {}
if do_fieldsrc:
J_source = jinja_env.get_template('update_j.cl').render(**jinja_args)
self.sources['J'] = J_source
@ -244,40 +257,36 @@ class Simulation(object):
J_fields[ptr('Jr')] = self.Jr
J_fields[ptr('Ji')] = self.Ji
'''
PML
'''
#
# PML
#
pml_e_fields, pml_h_fields = self._create_pmls(pmls)
if bloch_boundaries:
pml_f_fields, pml_g_fields = self._create_pmls(pmls)
'''
Create operations
'''
#
# Create operations
#
self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
if do_poynting:
self.update_S = self._create_operation(S_source, (base_fields, S_fields))
if bloch_boundaries:
self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))
if do_fieldsrc:
args = OrderedDict()
[args.update(d) for d in (base_fields, J_fields)]
args = base_fields | J_fields
var_args = [ctype + ' ' + v for v in 'cs'] + ['uint ' + r + m for r in 'xyz' for m in ('min', 'max')]
update = ElementwiseKernel(self.context, operation=J_source,
arguments=', '.join(list(args.keys()) + var_args))
self.update_J = lambda e, *a: update(*args.values(), *a, wait_for=e)
def _create_pmls(self, pmls):
def _create_pmls(self, pmls: Sequence[dict[str, float]]) -> tuple[dict[str, pyopencl.array.Array], ...]:
ctype = type_to_C(self.arg_type)
def ptr(arg: str) -> str:
return ctype + ' *' + arg
pml_e_fields = OrderedDict()
pml_h_fields = OrderedDict()
pml_e_fields = {}
pml_h_fields = {}
for pml in pmls:
a = 'xyz'.find(pml['axis'])
@ -285,7 +294,9 @@ class Simulation(object):
kappa_max = numpy.sqrt(pml['mu_eff'] * pml['epsilon_eff'])
alpha_max = pml['cfs_alpha']
def par(x):
print(sigma_max, kappa_max, alpha_max, pml['thickness'], self.dt)
def par(x, pml=pml, sigma_max=sigma_max, kappa_max=kappa_max, alpha_max=alpha_max): # noqa: ANN001, ANN202
scaling = (x / pml['thickness']) ** pml['m']
sigma = scaling * sigma_max
kappa = 1 + scaling * (kappa_max - 1)
@ -301,8 +312,13 @@ class Simulation(object):
elif pml['polarity'] == 'n':
xh -= 0.5
logger.debug(f'{pml=}')
logger.debug(f'{xe=}')
logger.debug(f'{xh=}')
logger.debug(f'{par(xe)=}')
logger.debug(f'{par(xh)=}')
pml_p_names = [['p' + pml['axis'] + i + eh + pml['polarity'] for i in '012'] for eh in 'eh']
for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh)):
for name_e, name_h, pe, ph in zip(pml_p_names[0], pml_p_names[1], par(xe), par(xh), strict=True):
pml_e_fields[ptr(name_e)] = pyopencl.array.to_device(self.queue, pe)
pml_h_fields[ptr(name_h)] = pyopencl.array.to_device(self.queue, ph)
@ -313,64 +329,87 @@ class Simulation(object):
psi_shape = list(self.shape)
psi_shape[a] = pml['thickness']
for ne, nh in zip(*psi_names):
for ne, nh in zip(*psi_names, strict=True):
pml_e_fields[ptr(ne)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
pml_h_fields[ptr(nh)] = pyopencl.array.zeros(self.queue, tuple(psi_shape), dtype=self.arg_type)
return pml_e_fields, pml_h_fields
def _create_operation(self, source, args_fields):
args = OrderedDict()
[args.update(d) for d in args_fields]
update = ElementwiseKernel(self.context, operation=source,
arguments=', '.join(args.keys()))
def _create_operation(self, source: str, args_fields: Sequence[dict[str, pyopencl.array.Array]]) -> Callable[..., pyopencl.Event]:
args = {}
for d in args_fields:
args.update(d)
update = ElementwiseKernel(
self.context,
operation=source,
arguments=', '.join(args.keys()),
)
return lambda e: update(*args.values(), wait_for=e)
def _create_context(self, context: pyopencl.Context = None,
queue: pyopencl.CommandQueue = None):
if context is None:
self.context = pyopencl.create_some_context()
else:
self.context = context
def _create_context(
self,
context: pyopencl.Context | None = None,
queue: pyopencl.CommandQueue | None = None,
) -> None:
self.context = context or pyopencl.create_some_context()
self.queue = queue or pyopencl.CommandQueue(self.context)
if queue is None:
self.queue = pyopencl.CommandQueue(self.context)
else:
self.queue = queue
def _create_eps(self, epsilon: List[numpy.ndarray]):
def _create_eps(self, epsilon: NDArray) -> pyopencl.array.Array:
if len(epsilon) != 3:
raise Exception('Epsilon must be a list with length of 3')
if not all((e.shape == epsilon[0].shape for e in epsilon[1:])):
raise Exception('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
raise FDTDError('Epsilon must be a list with length of 3')
if not all(e.shape == epsilon[0].shape for e in epsilon[1:]):
raise FDTDError('All epsilon grids must have the same shape. Shapes are {}', [e.shape for e in epsilon])
if not epsilon[0].shape == self.shape:
raise Exception('Epsilon shape mismatch. Expected {}, got {}'.format(self.shape, epsilon[0].shape))
raise FDTDError(f'Epsilon shape mismatch. Expected {self.shape}, got {epsilon[0].shape}')
self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(self.arg_type))
def _create_field(self, initial_value: List[numpy.ndarray] = None):
def _create_field(self, initial_value: NDArray | None = None) -> pyopencl.array.Array:
if initial_value is None:
return pyopencl.array.zeros_like(self.eps)
else:
if len(initial_value) != 3:
Exception('Initial field value must be a list of length 3')
if not all((f.shape == self.shape for f in initial_value)):
Exception('Initial field list elements must have same shape as epsilon elements')
return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
if len(initial_value) != 3:
raise FDTDError('Initial field value must be a list of length 3')
if not all(f.shape == self.shape for f in initial_value):
raise FDTDError('Initial field list elements must have same shape as epsilon elements')
return pyopencl.array.to_device(self.queue, vec(initial_value).astype(self.arg_type))
def type_to_C(float_type) -> str:
def type_to_C(
float_type: type,
) -> str:
"""
Returns a string corresponding to the C equivalent of a numpy type.
Only works for float16, float32, float64.
:param float_type: e.g. numpy.float32
:return: string containing the corresponding C type (eg. 'double')
Args:
float_type: e.g. numpy.float32
Returns:
string containing the corresponding C type (eg. 'double')
"""
if float_type == numpy.float16:
arg_type = 'half'
elif float_type == numpy.float32:
arg_type = 'float'
elif float_type == numpy.float64:
arg_type = 'double'
else:
raise Exception('Unsupported type')
return arg_type
types = {
numpy.float16: 'half',
numpy.float32: 'float',
numpy.float64: 'double',
numpy.complex64: 'cfloat_t',
numpy.complex128: 'cdouble_t',
}
if float_type not in types:
raise FDTDError(f'Unsupported type: {float_type}')
return types[float_type]
# def par(x):
# scaling = ((x / (pml['thickness'])) ** pml['m'])
# print('scaling', scaling)
# print('sigma_max * dt / 2', sigma_max * self.dt / 2)
# print('kappa_max', kappa_max)
# print('m', pml['m'])
# sigma = scaling * sigma_max
# kappa = 1 + scaling * (kappa_max - 1)
# alpha = ((1 - x / pml['thickness']) ** pml['ma']) * alpha_max
# p0 = 1/(1 + self.dt * (alpha + sigma / kappa))
# p1 = self.dt * sigma / kappa * p0
# p2 = 1/kappa
# print(p0.min(), p0.max(), p1.min(), p1.max())
# return p0, p1, p2
#
#

View File

@ -0,0 +1,382 @@
import unittest
import numpy
from opencl_fdtd import Simulation
from meanas import fdtd
class BasicTests():
def test_initial_fields(self):
# Make sure initial fields didn't change
e0 = self.es[0]
h0 = self.hs[0]
mask = self.src_mask
self.assertEqual(e0[mask], self.j_mag / self.epsilon[mask])
self.assertFalse(e0[~mask].any())
self.assertFalse(h0.any())
def test_initial_energy(self):
e0 = self.es[0]
h0 = self.hs[0]
h1 = self.hs[1]
mask = self.src_mask[1]
dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in e0.shape[1:]) for _ in range(2))
dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
u0 = self.j_mag * self.j_mag / self.epsilon[self.src_mask] * dV[mask]
args = {
'dxes': self.dxes,
'epsilon': self.epsilon,
}
# Make sure initial energy and E dot J are correct
energy0 = fdtd.energy_estep(h0=h0, e1=e0, h2=self.hs[1], **args)
e_dot_j_0 = fdtd.delta_energy_j(j0=(e0 - 0) * self.epsilon, e1=e0, dxes=self.dxes)
self.assertTrue(numpy.allclose(energy0[mask], u0))
self.assertFalse(energy0[~mask].any(), msg=f'{energy0=}')
self.assertTrue(numpy.allclose(e_dot_j_0[mask], u0))
self.assertFalse(e_dot_j_0[~mask].any(), msg=f'{e_dot_j_0=}')
def test_energy_conservation(self):
e0 = self.es[0]
u0 = fdtd.delta_energy_j(j0=(e0 - 0) * self.epsilon, e1=e0, dxes=self.dxes).sum()
args = {'dxes': self.dxes,
'epsilon': self.epsilon}
for ii in range(1, 8):
with self.subTest(i=ii):
u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
self.assertTrue(numpy.allclose(u_hstep.sum(), u0), msg=f'u_hstep: {u_hstep.sum()}\n{numpy.moveaxis(u_hstep, -1, 0)}')
self.assertTrue(numpy.allclose(u_estep.sum(), u0), msg=f'u_estep: {u_estep.sum()}\n{numpy.moveaxis(u_estep, -1, 0)}')
def test_poynting(self):
for ii in range(1, 3):
with self.subTest(i=ii):
s = fdtd.poynting(e=self.es[ii], h=self.hs[ii+1] + self.hs[ii])
sf = numpy.moveaxis(s, -1, 0)
ss = numpy.moveaxis(self.ss[ii], -1, 0)
self.assertTrue(numpy.allclose(s, self.ss[ii], rtol=1e-4),
msg=f'From ExH:\n{sf}\nFrom sim.S:\n{ss}')
def test_poynting_divergence(self):
args = {
'dxes': self.dxes,
'epsilon': self.epsilon,
}
dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in self.epsilon.shape[1:]) for _ in range(2))
dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
u_eprev = None
for ii in range(1, 8):
with self.subTest(i=ii):
u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
du_half_h2e = u_estep - u_hstep
div_s_h2e = self.dt * fdtd.poynting_divergence(e=self.es[ii], h=self.hs[ii], dxes=self.dxes) * dV
du_half_h2e_f = numpy.moveaxis(du_half_h2e, -1, 0)
div_s_h2e_f = -numpy.moveaxis(div_s_h2e, -1, 0)
self.assertTrue(numpy.allclose(du_half_h2e, -div_s_h2e, rtol=1e-4),
msg=f'du_half_h2e\n{du_half_h2e_f}\ndiv_s_h2e\n{div_s_h2e_f}')
if u_eprev is None:
u_eprev = u_estep
continue
# previous half-step
du_half_e2h = u_hstep - u_eprev
div_s_e2h = self.dt * fdtd.poynting_divergence(e=self.es[ii-1], h=self.hs[ii], dxes=self.dxes) * dV
du_half_e2h_f = numpy.moveaxis(du_half_e2h, -1, 0)
div_s_e2h_f = -numpy.moveaxis(div_s_e2h, -1, 0)
self.assertTrue(numpy.allclose(du_half_e2h, -div_s_e2h, rtol=1e-4),
msg=f'du_half_e2h\n{du_half_e2h_f}\ndiv_s_e2h\n{div_s_e2h_f}')
u_eprev = u_estep
def test_poynting_planes(self):
args = {
'dxes': self.dxes,
'epsilon': self.epsilon,
}
dxes = self.dxes if self.dxes is not None else tuple(tuple(numpy.ones(s) for s in self.epsilon.shape[1:]) for _ in range(2))
dV = numpy.prod(numpy.meshgrid(*dxes[0], indexing='ij'), axis=0)
mx = numpy.roll(self.src_mask, (-1, -1), axis=(0, 1))
my = numpy.roll(self.src_mask, -1, axis=2)
mz = numpy.roll(self.src_mask, (+1, -1), axis=(0, 3))
px = numpy.roll(self.src_mask, -1, axis=0)
py = self.src_mask.copy()
pz = numpy.roll(self.src_mask, +1, axis=0)
u_eprev = None
for ii in range(1, 8):
with self.subTest(i=ii):
u_hstep = fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args)
u_estep = fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args)
s_h2e = -fdtd.poynting(e=self.es[ii], h=self.hs[ii]) * self.dt
s_h2e[0] *= dxes[0][1][None, :, None] * dxes[0][2][None, None, :]
s_h2e[1] *= dxes[0][0][:, None, None] * dxes[0][2][None, None, :]
s_h2e[2] *= dxes[0][0][:, None, None] * dxes[0][1][None, :, None]
planes = [s_h2e[px].sum(), -s_h2e[mx].sum(),
s_h2e[py].sum(), -s_h2e[my].sum(),
s_h2e[pz].sum(), -s_h2e[mz].sum()]
du = (u_estep - u_hstep)[self.src_mask[1]]
self.assertTrue(numpy.allclose(sum(planes), (u_estep - u_hstep)[self.src_mask[1]]),
msg=f'planes: {planes} (sum: {sum(planes)})\n du:\n {du}')
if u_eprev is None:
u_eprev = u_estep
continue
s_e2h = -fdtd.poynting(e=self.es[ii - 1], h=self.hs[ii]) * self.dt
s_e2h[0] *= dxes[0][1][None, :, None] * dxes[0][2][None, None, :]
s_e2h[1] *= dxes[0][0][:, None, None] * dxes[0][2][None, None, :]
s_e2h[2] *= dxes[0][0][:, None, None] * dxes[0][1][None, :, None]
planes = [s_e2h[px].sum(), -s_e2h[mx].sum(),
s_e2h[py].sum(), -s_e2h[my].sum(),
s_e2h[pz].sum(), -s_e2h[mz].sum()]
du = (u_hstep - u_eprev)[self.src_mask[1]]
self.assertTrue(numpy.allclose(sum(planes), (u_hstep - u_eprev)[self.src_mask[1]]),
msg=f'planes: {du} (sum: {sum(planes)})\n du:\n {du}')
# previous half-step
u_eprev = u_estep
class Basic2DNoDXOnlyVacuum(unittest.TestCase, BasicTests):
def setUp(self):
shape = [3, 5, 5, 1]
self.dt = 0.5
self.epsilon = numpy.ones(shape, dtype=float)
self.j_mag = 32
self.dxes = None
self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
self.src_mask[1, 2, 2, 0] = True
e = numpy.zeros_like(self.epsilon)
h = numpy.zeros_like(self.epsilon)
e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
self.es = [e]
self.hs = [h]
self.ss = []
sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
initial_fields={'E': e, 'H': h}, do_poynting=True)
for _ in range(9):
e = e.copy()
h = h.copy()
sim.update_H([]).wait()
sim.update_E([]).wait()
self.es.append(sim.E.get().reshape(shape))
self.hs.append(sim.H.get().reshape(shape))
self.ss.append(sim.S.get().reshape(shape))
class Basic2DUniformDX3(unittest.TestCase, BasicTests):
def setUp(self):
shape = [3, 5, 1, 5]
self.dt = 0.5
self.j_mag = 32
self.dxes = tuple(tuple(numpy.full(s, 2.0) for s in shape[1:]) for _ in range(2))
self.src_mask = numpy.zeros(shape, dtype=bool)
self.src_mask[1, 2, 0, 2] = True
self.epsilon = numpy.full(shape, 1, dtype=float)
self.epsilon[self.src_mask] = 2
e = numpy.zeros_like(self.epsilon)
h = numpy.zeros_like(self.epsilon)
e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
self.es = [e]
self.hs = [h]
self.ss = []
sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
initial_fields={'E': e, 'H': h}, do_poynting=True)
for _ in range(9):
e = e.copy()
h = h.copy()
sim.update_H([]).wait()
sim.update_E([]).wait()
self.es.append(sim.E.get().reshape(shape))
self.hs.append(sim.H.get().reshape(shape))
self.ss.append(sim.S.get().reshape(shape))
class Basic3DUniformDXOnlyVacuum(unittest.TestCase, BasicTests):
def setUp(self):
shape = [3, 5, 5, 5]
self.dt = 0.5
self.epsilon = numpy.ones(shape, dtype=float)
self.j_mag = 32
self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
self.src_mask[1, 2, 2, 2] = True
e = numpy.zeros_like(self.epsilon)
h = numpy.zeros_like(self.epsilon)
e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
self.es = [e]
self.hs = [h]
self.ss = []
sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
initial_fields={'E': e, 'H': h}, do_poynting=True)
for _ in range(9):
e = e.copy()
h = h.copy()
sim.update_H([]).wait()
sim.update_E([]).wait()
self.es.append(sim.E.get().reshape(shape))
self.hs.append(sim.H.get().reshape(shape))
self.ss.append(sim.S.get().reshape(shape))
class Basic3DUniformDXUniformN(unittest.TestCase, BasicTests):
def setUp(self):
shape = [3, 5, 5, 5]
self.dt = 0.5
self.epsilon = numpy.full(shape, 2.5, dtype=float)
self.j_mag = 32
self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
self.src_mask = numpy.zeros_like(self.epsilon, dtype=bool)
self.src_mask[1, 2, 2, 2] = True
e = numpy.zeros_like(self.epsilon)
h = numpy.zeros_like(self.epsilon)
e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
self.es = [e]
self.hs = [h]
self.ss = []
sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
initial_fields={'E': e, 'H': h}, do_poynting=True)
for _ in range(9):
e = e.copy()
h = h.copy()
sim.update_H([]).wait()
sim.update_E([]).wait()
self.es.append(sim.E.get().reshape(shape))
self.hs.append(sim.H.get().reshape(shape))
self.ss.append(sim.S.get().reshape(shape))
class Basic3DUniformDX(unittest.TestCase, BasicTests):
def setUp(self):
shape = [3, 5, 5, 5]
self.dt = 0.33
self.j_mag = 32
self.dxes = tuple(tuple(numpy.ones(s) for s in shape[1:]) for _ in range(2))
self.src_mask = numpy.zeros(shape, dtype=bool)
self.src_mask[1, 2, 2, 2] = True
self.epsilon = numpy.full(shape, 1, dtype=float)
self.epsilon[self.src_mask] = 2
e = numpy.zeros_like(self.epsilon)
h = numpy.zeros_like(self.epsilon)
e[self.src_mask] = self.j_mag / self.epsilon[self.src_mask]
self.es = [e]
self.hs = [h]
self.ss = []
sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
initial_fields={'E': e, 'H': h}, do_poynting=True)
for _ in range(9):
e = e.copy()
h = h.copy()
sim.update_H([]).wait()
sim.update_E([]).wait()
self.es.append(sim.E.get().reshape(shape))
self.hs.append(sim.H.get().reshape(shape))
self.ss.append(sim.S.get().reshape(shape))
class JdotE_3DUniformDX(unittest.TestCase):
def setUp(self):
shape = [3, 5, 5, 5]
self.dt = 0.5
self.j_mag = 32
self.dxes = tuple(tuple(numpy.full(s, 2.0) for s in shape[1:]) for _ in range(2))
self.src_mask = numpy.zeros(shape, dtype=bool)
self.src_mask[1, 2, 2, 2] = True
self.epsilon = numpy.full(shape, 4, dtype=float)
self.epsilon[self.src_mask] = 2
rng = numpy.random.default_rng()
e = rng.integers(-128, 128 + 1, size=shape, dtype=float)
h = rng.integers(-128, 128 + 1, size=shape, dtype=float)
self.es = [e]
self.hs = [h]
self.ss = []
sim = Simulation(epsilon=self.epsilon, pmls=[], dt=self.dt, dxes=self.dxes,
initial_fields={'E': e, 'H': h}, do_poynting=True, float_type=numpy.float64)
eh2h = fdtd.maxwell_h(dt=self.dt, dxes=self.dxes)
eh2e = fdtd.maxwell_e(dt=self.dt, dxes=self.dxes)
for ii in range(9):
e = e.copy()
h = h.copy()
sim.update_H([]).wait()
sim.update_E([]).wait()
if ii == 1:
nn = numpy.where(self.src_mask.flat)[0][0]
self.e_before = sim.E.get().reshape(shape)
sim.E[nn] += self.j_mag / self.epsilon[self.src_mask][0]
self.e_after = sim.E.get().reshape(shape)
self.j_dot_e = self.j_mag * e[self.src_mask]
self.hs.append(sim.H.get().reshape(shape))
self.es.append(sim.E.get().reshape(shape))
self.ss.append(sim.S.get().reshape(shape))
def test_j_dot_e(self):
h0 = self.hs[2]
e0 = self.es[1]
e1 = self.es[2]
j0 = numpy.zeros_like(e0)
j0[self.src_mask] = self.j_mag
args = {
'dxes': self.dxes,
'epsilon': self.epsilon,
}
e2h = fdtd.maxwell_h(dt=self.dt, dxes=self.dxes)
#ee = j0 * (2 * e0 - j0)
#hh = h0 * e2h(j0, numpy.zeros_like(h0))
#u0 = fdtd.dxmul(ee, hh, **args)
u0 = fdtd.delta_energy_j(j0=j0, e1=(e0 + e1), dxes=self.dxes)
#uh = [fdtd.energy_hstep(e0=self.es[ii-1], h1=self.hs[ii], e2=self.es[ii], **args) for ii in range(1, 9)]
#ue = [fdtd.energy_estep(h0=self.hs[ii], e1=self.es[ii], h2=self.hs[ii + 1], **args) for ii in range(8)]
#uht = [uu.sum() for uu in uh]
#uet = [uu.sum() for uu in ue]
u_hstep = fdtd.energy_hstep(e0=self.es[0], h1=self.hs[1], e2=self.es[1], **args)
u_estep = fdtd.energy_estep(h0=self.hs[-2], e1=self.es[-2], h2=self.hs[-1], **args)
#breakpoint()
du = (u_estep - u_hstep).sum()
self.assertTrue(numpy.allclose(u0.sum(), (u_estep - u_hstep).sum()), msg=f'{u0.sum()} != {du}')

213
pcgen.py
View File

@ -1,213 +0,0 @@
"""
Routines for creating normalized 2D lattices and common photonic crystal
cavity designs.
"""
from typing import List
import numpy
def triangular_lattice(dims: List[int],
asymmetrical: bool=False
) -> numpy.ndarray:
"""
Return an ndarray of [[x0, y0], [x1, y1], ...] denoting lattice sites for
a triangular lattice in 2D. The lattice will be centered around (0, 0),
unless asymmetrical=True in which case there will be extra holes in the +x
direction.
:param dims: Number of lattice sites in the [x, y] directions.
:param asymmetrical: If true, each row in x will contain the same number of
lattice sites. If false, the structure is symmetrical around (0, 0).
:return: [[x0, y0], [x1, 1], ...] denoting lattice sites.
"""
dims = numpy.array(dims, dtype=int)
if asymmetrical:
k = 0
else:
k = 1
positions = []
ymax = (dims[1] - 1)/2
for j in numpy.linspace(-ymax, ymax, dims[0]):
j_odd = numpy.floor(j) % 2
x_offset = j_odd * 0.5
y_offset = j * numpy.sqrt(3)/2
num_x = dims[0] - k * j_odd
xmax = (dims[0] - 1)/2
xs = numpy.linspace(-xmax, xmax - k * j_odd, num_x) + x_offset
ys = numpy.full_like(xs, y_offset)
positions += [numpy.vstack((xs, ys)).T]
xy = numpy.vstack(tuple(positions))
return xy[xy[:, 0].argsort(), ]
def square_lattice(dims: List[int]) -> numpy.ndarray:
"""
Return an ndarray of [[x0, y0], [x1, y1], ...] denoting lattice sites for
a square lattice in 2D. The lattice will be centered around (0, 0).
:param dims: Number of lattice sites in the [x, y] directions.
:return: [[x0, y0], [x1, 1], ...] denoting lattice sites.
"""
xs, ys = numpy.meshgrid(range(dims[0]), range(dims[1]), 'xy')
xs -= dims[0]/2
ys -= dims[1]/2
xy = numpy.vstack((xs.flatten(), ys.flatten())).T
return xy[xy[:, 0].argsort(), ]
# ### Photonic crystal functions ###
def nanobeam_holes(a_defect: float,
num_defect_holes: int,
num_mirror_holes: int
) -> numpy.ndarray:
"""
Returns a list of [[x0, r0], [x1, r1], ...] of nanobeam hole positions and radii.
Creates a region in which the lattice constant and radius are progressively
(linearly) altered over num_defect_holes holes until they reach the value
specified by a_defect, then symmetrically returned to a lattice constant and
radius of 1, which is repeated num_mirror_holes times on each side.
:param a_defect: Minimum lattice constant for the defect, as a fraction of the
mirror lattice constant (ie., for no defect, a_defect = 1).
:param num_defect_holes: How many holes form the defect (per-side)
:param num_mirror_holes: How many holes form the mirror (per-side)
:return: Ndarray [[x0, r0], [x1, r1], ...] of nanobeam hole positions and radii.
"""
a_values = numpy.linspace(a_defect, 1, num_defect_holes, endpoint=False)
xs = a_values.cumsum() - (a_values[0] / 2) # Later mirroring makes center distance 2x as long
mirror_xs = numpy.arange(1, num_mirror_holes + 1) + xs[-1]
mirror_rs = numpy.ones_like(mirror_xs)
return numpy.vstack((numpy.hstack((-mirror_xs[::-1], -xs[::-1], xs, mirror_xs)),
numpy.hstack((mirror_rs[::-1], a_values[::-1], a_values, mirror_rs)))).T
def ln_defect(mirror_dims: List[int], defect_length: int) -> numpy.ndarray:
"""
N-hole defect in a triangular lattice.
:param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
is 2 * n + 1 in each direction.
:param defect_length: Length of defect. Should be an odd number.
:return: [[x0, y0], [x1, y1], ...] for all the holes
"""
if defect_length % 2 != 1:
raise Exception('defect_length must be odd!')
p = triangular_lattice([2 * d + 1 for d in mirror_dims])
half_length = numpy.floor(defect_length / 2)
hole_nums = numpy.arange(-half_length, half_length + 1)
holes_to_keep = numpy.in1d(p[:, 0], hole_nums, invert=True)
return p[numpy.logical_or(holes_to_keep, p[:, 1] != 0), ]
def ln_shift_defect(mirror_dims: List[int],
defect_length: int,
shifts_a: List[float]=(0.15, 0, 0.075),
shifts_r: List[float]=(1, 1, 1)
) -> numpy.ndarray:
"""
N-hole defect with shifted holes (intended to give the mode a gaussian profile
in real- and k-space so as to improve both Q and confinement). Holes along the
defect line are shifted and altered according to the shifts_* parameters.
:param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
is 2 * n + 1 in each direction.
:param defect_length: Length of defect. Should be an odd number.
:param shifts_a: Percentage of a to shift (1st, 2nd, 3rd,...) holes along the defect line
:param shifts_r: Factor to multiply the radius by. Should match length of shifts_a
:return: [[x0, y0, r0], [x1, y1, r1], ...] for all the holes
"""
if not hasattr(shifts_a, "__len__") and shifts_a is not None:
shifts_a = [shifts_a]
if not hasattr(shifts_r, "__len__") and shifts_r is not None:
shifts_r = [shifts_r]
xy = ln_defect(mirror_dims, defect_length)
# Add column for radius
xyr = numpy.hstack((xy, numpy.ones((xy.shape[0], 1))))
# Shift holes
# Expand shifts as necessary
n_shifted = max(len(shifts_a), len(shifts_r))
tmp_a = numpy.array(shifts_a)
shifts_a = numpy.ones((n_shifted, ))
shifts_a[:len(tmp_a)] = tmp_a
tmp_r = numpy.array(shifts_r)
shifts_r = numpy.ones((n_shifted, ))
shifts_r[:len(tmp_r)] = tmp_r
x_removed = numpy.floor(defect_length / 2)
for ind in range(n_shifted):
for sign in (-1, 1):
x_val = sign * (x_removed + ind + 1)
which = numpy.logical_and(xyr[:, 0] == x_val, xyr[:, 1] == 0)
xyr[which, ] = (x_val + numpy.sign(x_val) * shifts_a[ind], 0, shifts_r[ind])
return xyr
def r6_defect(mirror_dims: List[int]) -> numpy.ndarray:
"""
R6 defect in a triangular lattice.
:param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
is 2 * n + 1 in each direction.
:return: [[x0, y0], [x1, y1], ...] specifying hole centers.
"""
xy = triangular_lattice([2 * d + 1 for d in mirror_dims])
rem_holes_plus = numpy.array([[1, 0],
[0.5, +numpy.sqrt(3)/2],
[0.5, -numpy.sqrt(3)/2]])
rem_holes = numpy.vstack((rem_holes_plus, -rem_holes_plus))
for rem_xy in rem_holes:
xy = xy[(xy != rem_xy).any(axis=1), ]
return xy
def l3_shift_perturbed_defect(mirror_dims: List[int],
perturbed_radius: float=1.1,
shifts_a: List[float]=(),
shifts_r: List[float]=()
) -> numpy.ndarray:
"""
3-hole defect with perturbed hole sizes intended to form an upwards-directed
beam. Can also include shifted holes along the defect line, intended
to give the mode a more gaussian profile to improve Q.
:param mirror_dims: [x, y] mirror lengths (number of holes). Total number of holes
is 2 * n + 1 in each direction.
:param perturbed_radius: Amount to perturb the radius of the holes used for beam-forming
:param shifts_a: Percentage of a to shift (1st, 2nd, 3rd,...) holes along the defect line
:param shifts_r: Factor to multiply the radius by. Should match length of shifts_a
:return: [[x0, y0, r0], [x1, y1, r1], ...] for all the holes
"""
xyr = ln_shift_defect(mirror_dims, 3, shifts_a, shifts_r)
abs_x, abs_y = (numpy.fabs(xyr[:, i]) for i in (0, 1))
# Sorted unique xs and ys
# Ignore row y=0 because it might have shifted holes
xs = numpy.unique(abs_x[abs_x != 0])
ys = numpy.unique(abs_y)
# which holes should be perturbed? (xs[[3, 7]], ys[1]) and (xs[[2, 6]], ys[2])
perturbed_holes = ((xs[a], ys[b]) for a, b in ((3, 1), (7, 1), (2, 2), (6, 2)))
for row in xyr:
if numpy.fabs(row) in perturbed_holes:
row[2] = perturbed_radius
return xyr

93
pyproject.toml Normal file
View File

@ -0,0 +1,93 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "opencl_fdtd"
description = "OpenCL FDTD solver"
readme = "README.md"
license = { file = "LICENSE.md" }
authors = [
{ name="Jan Petykiewicz", email="jan@mpxd.net" },
]
homepage = "https://mpxd.net/code/jan/opencl_fdtd"
repository = "https://mpxd.net/code/jan/opencl_fdtd"
keywords = [
"FDTD",
"finite",
"difference",
"time",
"domain",
"simulation",
"optics",
"electromagnetic",
"dielectric",
]
classifiers = [
"Programming Language :: Python :: 3",
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Manufacturing",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: GNU Affero General Public License v3",
"Topic :: Scientific/Engineering",
]
requires-python = ">=3.11"
dynamic = ["version"]
dependencies = [
"numpy>=1.26",
"pyopencl",
"jinja2",
"meanas>=0.3",
]
[tool.hatch.version]
path = "opencl_fdtd/__init__.py"
[tool.ruff]
exclude = [
".git",
"dist",
]
line-length = 145
indent-width = 4
lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
lint.select = [
"NPY", "E", "F", "W", "B", "ANN", "UP", "SLOT", "SIM", "LOG",
"C4", "ISC", "PIE", "PT", "RET", "TCH", "PTH", "INT",
"ARG", "PL", "R", "TRY",
"G010", "G101", "G201", "G202",
"Q002", "Q003", "Q004",
]
lint.ignore = [
#"ANN001", # No annotation
"ANN002", # *args
"ANN003", # **kwargs
"ANN401", # Any
"ANN101", # self: Self
"SIM108", # single-line if / else assignment
"RET504", # x=y+z; return x
"PIE790", # unnecessary pass
"ISC003", # non-implicit string concatenation
"C408", # dict(x=y) instead of {'x': y}
"PLR09", # Too many xxx
"PLR2004", # magic number
"PLC0414", # import x as x
"TRY003", # Long exception message
]
[[tool.mypy.overrides]]
module = [
"scipy",
"scipy.optimize",
"scipy.linalg",
"scipy.sparse",
"scipy.sparse.linalg",
"pyopencl",
"pyopencl.array",
"pyopencl.elementwise",
"pyopencl.reduction",
]
ignore_missing_imports = true

View File

@ -1,30 +0,0 @@
#!/usr/bin/env python3
from setuptools import setup, find_packages
import opencl_fdtd
with open('README.md', 'r') as f:
long_description = f.read()
setup(name='opencl_fdtd',
version=opencl_fdtd.version,
description='OpenCL FDTD solver',
long_description=long_description,
long_description_content_type='text/markdown',
author='Jan Petykiewicz',
author_email='anewusername@gmail.com',
url='https://mpxd.net/code/jan/opencl_fdtd',
packages=find_packages(),
package_data={
'opencl_fdfd': ['kernels/*']
},
install_requires=[
'numpy',
'pyopencl',
'jinja2',
'fdfd_tools>=0.3',
],
extras_require={
},
)