forked from jan/opencl_fdfd
239 lines
7.3 KiB
Python
239 lines
7.3 KiB
Python
"""
|
|
Default FDFD solver
|
|
|
|
This file holds the default FDFD solver, which uses an E-field wave
|
|
operator implemented directly as OpenCL arithmetic (rather than as
|
|
a matrix).
|
|
"""
|
|
import time
|
|
import logging
|
|
|
|
import numpy
|
|
from numpy.typing import NDArray, ArrayLike
|
|
from numpy.linalg import norm
|
|
from numpy import floating, complexfloating
|
|
import pyopencl
|
|
import pyopencl.array
|
|
|
|
import meanas.fdfd.operators
|
|
|
|
from . import ops
|
|
|
|
|
|
__author__ = 'Jan Petykiewicz'
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def cg_solver(
|
|
omega: complex,
|
|
dxes: list[list[NDArray[floating | complexfloating]]],
|
|
J: ArrayLike,
|
|
epsilon: ArrayLike,
|
|
mu: ArrayLike | None = None,
|
|
pec: ArrayLike | None = None,
|
|
pmc: ArrayLike | None = None,
|
|
adjoint: bool = False,
|
|
max_iters: int = 40000,
|
|
err_threshold: float = 1e-6,
|
|
context: pyopencl.Context | None = None,
|
|
) -> NDArray:
|
|
"""
|
|
OpenCL FDFD solver using the iterative conjugate gradient (cg) method
|
|
and implementing the diagonalized E-field wave operator directly in
|
|
OpenCL.
|
|
|
|
All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays,
|
|
either use meanas.fdmath.vec() or numpy:
|
|
f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z])))
|
|
|
|
Args:
|
|
omega: Complex frequency to solve at.
|
|
dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes)
|
|
J: Electric current distribution (at E-field locations)
|
|
epsilon: Dielectric constant distribution (at E-field locations)
|
|
mu: Magnetic permeability distribution (at H-field locations)
|
|
pec: Perfect electric conductor distribution
|
|
(at E-field locations; non-zero value indicates PEC is present)
|
|
pmc: Perfect magnetic conductor distribution
|
|
(at H-field locations; non-zero value indicates PMC is present)
|
|
adjoint: If true, solves the adjoint problem.
|
|
max_iters: Maximum number of iterations. Default 40,000.
|
|
err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success.
|
|
Default 1e-6.
|
|
context: PyOpenCL context to run in. If not given, construct a new context.
|
|
|
|
Returns:
|
|
E-field which solves the system. Returned even if we did not converge.
|
|
"""
|
|
start_time = time.perf_counter()
|
|
|
|
shape = [dd.size for dd in dxes[0]]
|
|
|
|
b = -1j * omega * numpy.array(J, copy=False)
|
|
|
|
'''
|
|
** In this comment, I use the following notation:
|
|
M* = conj(M),
|
|
M.T = transpose(M),
|
|
M' = ctranspose(M),
|
|
M N = dot(M, N)
|
|
|
|
This solver uses a symmetrized wave operator M = (L A R) = (L A R).T
|
|
(where L = inv(R) are diagonal preconditioner matrices) when
|
|
solving the wave equation; therefore, it solves the problem
|
|
M y = d
|
|
=> (L A R) (inv(R) x) = (L b)
|
|
=> A x = b
|
|
with x = R y
|
|
|
|
From the fact that M is symmetric, we can write
|
|
(L A R)* = M* = M' = (L A R)' = R' A' L' = R* A' L*
|
|
We obtain M* by conjugating all of our arguments (except J).
|
|
|
|
Then we solve
|
|
(R* A' L*) v = (R* b)
|
|
and obtain x:
|
|
x = L* v
|
|
|
|
We can accomplish all this simply by conjugating everything (except J) and
|
|
reversing the order of L and R
|
|
'''
|
|
epsilon = numpy.array(epsilon, copy=False)
|
|
if adjoint:
|
|
# Conjugate everything
|
|
dxes = [[numpy.conj(dd) for dd in dds] for dds in dxes]
|
|
omega = numpy.conj(omega)
|
|
epsilon = numpy.conj(epsilon)
|
|
if mu is not None:
|
|
mu = numpy.conj(mu)
|
|
assert isinstance(epsilon, NDArray[floating] | NDArray[complexfloating])
|
|
|
|
L, R = meanas.fdfd.operators.e_full_preconditioners(dxes)
|
|
b_preconditioned = (R if adjoint else L) @ b
|
|
|
|
'''
|
|
Allocate GPU memory and load in data
|
|
'''
|
|
if context is None:
|
|
context = pyopencl.create_some_context(interactive=True)
|
|
|
|
queue = pyopencl.CommandQueue(context)
|
|
|
|
def load_field(v: NDArray[complexfloating | floating], dtype: type = numpy.complex128) -> pyopencl.array.Array:
|
|
return pyopencl.array.to_device(queue, v.astype(dtype))
|
|
|
|
r = load_field(b_preconditioned) # load preconditioned b into r
|
|
H = pyopencl.array.zeros_like(r)
|
|
x = pyopencl.array.zeros_like(r)
|
|
v = pyopencl.array.zeros_like(r)
|
|
p = pyopencl.array.zeros_like(r)
|
|
|
|
alpha = 1.0 + 0j
|
|
rho = 1.0 + 0j
|
|
errs = []
|
|
|
|
inv_dxes = [[load_field(1 / numpy.array(dd, copy=False)) for dd in dds] for dds in dxes]
|
|
oeps = load_field(-omega ** 2 * epsilon)
|
|
Pl = load_field(L.diagonal())
|
|
Pr = load_field(R.diagonal())
|
|
|
|
if mu is None:
|
|
invm = load_field(numpy.array([]))
|
|
else:
|
|
invm = load_field(1 / numpy.array(mu, copy=False))
|
|
mu = numpy.array(mu, copy=False)
|
|
|
|
if pec is None:
|
|
gpec = load_field(numpy.array([]), dtype=numpy.int8)
|
|
else:
|
|
gpec = load_field(numpy.array(pec, dtype=bool, copy=False), dtype=numpy.int8)
|
|
|
|
if pmc is None:
|
|
gpmc = load_field(numpy.array([]), dtype=numpy.int8)
|
|
else:
|
|
gpmc = load_field(numpy.array(pmc, dtype=bool, copy=False), dtype=numpy.int8)
|
|
|
|
'''
|
|
Generate OpenCL kernels
|
|
'''
|
|
has_mu, has_pec, has_pmc = [q is not None for q in (mu, pec, pmc)]
|
|
|
|
a_step_full = ops.create_a(context, shape, has_mu, has_pec, has_pmc)
|
|
xr_step = ops.create_xr_step(context)
|
|
rhoerr_step = ops.create_rhoerr_step(context)
|
|
p_step = ops.create_p_step(context)
|
|
dot = ops.create_dot(context)
|
|
|
|
def a_step(
|
|
E: pyopencl.array.Array,
|
|
H: pyopencl.array.Array,
|
|
p: pyopencl.array.Array,
|
|
events: list[pyopencl.Event],
|
|
) -> list[pyopencl.Event]:
|
|
return a_step_full(E, H, p, inv_dxes, oeps, invm, gpec, gpmc, Pl, Pr, events)
|
|
|
|
'''
|
|
Start the solve
|
|
'''
|
|
start_time2 = time.perf_counter()
|
|
|
|
_, err2 = rhoerr_step(r, [])
|
|
b_norm = numpy.sqrt(err2)
|
|
logging.debug(f'b_norm check: {b_norm}')
|
|
|
|
success = False
|
|
for k in range(max_iters):
|
|
do_print = (k % 100 == 0)
|
|
if do_print:
|
|
logger.debug(f'[{k:06d}] rho {rho:.4} alpha {alpha:4.4}')
|
|
|
|
rho_prev = rho
|
|
e = xr_step(x, p, r, v, alpha, [])
|
|
rho, err2 = rhoerr_step(r, e)
|
|
|
|
errs += [numpy.sqrt(err2) / b_norm]
|
|
|
|
if do_print:
|
|
logger.debug(f'err {errs[-1]}')
|
|
|
|
if errs[-1] < err_threshold:
|
|
success = True
|
|
break
|
|
|
|
e = p_step(p, r, rho/rho_prev, [])
|
|
e = a_step(v, H, p, e)
|
|
alpha = rho / dot(p, v, e)
|
|
|
|
if k % 1000 == 0:
|
|
logger.info(f'iteration {k}')
|
|
|
|
'''
|
|
Done solving
|
|
'''
|
|
time_elapsed = time.perf_counter() - start_time
|
|
|
|
# Undo preconditioners
|
|
if adjoint:
|
|
x = (Pl * x).get()
|
|
else:
|
|
x = (Pr * x).get()
|
|
|
|
if success:
|
|
logger.info('Solve success')
|
|
else:
|
|
logger.warning('Solve failure')
|
|
logger.info(f'{k} iterations in {time_elapsed} sec: {k / time_elapsed} iterations/sec')
|
|
logger.debug(f'final error {errs[-1]}')
|
|
logger.debug(f'overhead {start_time2 - start_time} sec')
|
|
|
|
A0 = meanas.fdfd.operators.e_full(omega, dxes, epsilon, mu).tocsr()
|
|
if adjoint:
|
|
# Remember we conjugated all the contents of A earlier
|
|
A0 = A0.T
|
|
|
|
residual = norm(A0 @ x - b) / norm(b)
|
|
logger.info(f'Post-everything residual: {residual}')
|
|
return x
|
|
|