You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

231 lines
6.9 KiB
Python

"""
Default FDFD solver
This file holds the default FDFD solver, which uses an E-field wave
operator implemented directly as OpenCL arithmetic (rather than as
a matrix).
"""
from typing import List
import time
import logging
import numpy
from numpy.linalg import norm
import pyopencl
import pyopencl.array
import fdfd_tools.operators
from . import ops
__author__ = 'Jan Petykiewicz'
logger = logging.getLogger(__name__)
def cg_solver(omega: complex,
dxes: List[List[numpy.ndarray]],
J: numpy.ndarray,
epsilon: numpy.ndarray,
mu: numpy.ndarray = None,
pec: numpy.ndarray = None,
pmc: numpy.ndarray = None,
adjoint: bool = False,
max_iters: int = 40000,
err_threshold: float = 1e-6,
context: pyopencl.Context = None,
) -> numpy.ndarray:
"""
OpenCL FDFD solver using the iterative conjugate gradient (cg) method
and implementing the diagonalized E-field wave operator directly in
OpenCL.
All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays,
either use fdfd_tools.vec() or numpy:
f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z])))
:param omega: Complex frequency to solve at.
:param dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes)
:param J: Electric current distribution (at E-field locations)
:param epsilon: Dielectric constant distribution (at E-field locations)
:param mu: Magnetic permeability distribution (at H-field locations)
:param pec: Perfect electric conductor distribution
(at E-field locations; non-zero value indicates PEC is present)
:param pmc: Perfect magnetic conductor distribution
(at H-field locations; non-zero value indicates PMC is present)
:param adjoint: If true, solves the adjoint problem.
:param max_iters: Maximum number of iterations. Default 40,000.
:param err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success.
Default 1e-6.
:param context: PyOpenCL context to run in. If not given, construct a new context.
:return: E-field which solves the system. Returned even if we did not converge.
"""
start_time = time.perf_counter()
b = -1j * omega * J
shape = [d.size for d in dxes[0]]
'''
** In this comment, I use the following notation:
M* = conj(M),
M.T = transpose(M),
M' = ctranspose(M),
M N = dot(M, N)
This solver uses a symmetrized wave operator M = (L A R) = (L A R).T
(where L = inv(R) are diagonal preconditioner matrices) when
solving the wave equation; therefore, it solves the problem
M y = d
=> (L A R) (inv(R) x) = (L b)
=> A x = b
with x = R y
From the fact that M is symmetric, we can write
(L A R)* = M* = M' = (L A R)' = R' A' L' = R* A' L*
We obtain M* by conjugating all of our arguments (except J).
Then we solve
(R* A' L*) v = (R* b)
and obtain x:
x = L* v
We can accomplish all this simply by conjugating everything (except J) and
reversing the order of L and R
'''
if adjoint:
# Conjugate everything
dxes = [[numpy.conj(d) for d in dd] for dd in dxes]
omega = numpy.conj(omega)
epsilon = numpy.conj(epsilon)
if mu is not None:
mu = numpy.conj(mu)
L, R = fdfd_tools.operators.e_full_preconditioners(dxes)
if adjoint:
b_preconditioned = R @ b
else:
b_preconditioned = L @ b
'''
Allocate GPU memory and load in data
'''
if context is None:
context = pyopencl.create_some_context(interactive=True)
queue = pyopencl.CommandQueue(context)
def load_field(v, dtype=numpy.complex128):
return pyopencl.array.to_device(queue, v.astype(dtype))
r = load_field(b_preconditioned) # load preconditioned b into r
H = pyopencl.array.zeros_like(r)
x = pyopencl.array.zeros_like(r)
v = pyopencl.array.zeros_like(r)
p = pyopencl.array.zeros_like(r)
alpha = 1.0 + 0j
rho = 1.0 + 0j
errs = []
inv_dxes = [[load_field(1 / d) for d in dd] for dd in dxes]
oeps = load_field(-omega ** 2 * epsilon)
Pl = load_field(L.diagonal())
Pr = load_field(R.diagonal())
if mu is None:
invm = load_field(numpy.array([]))
else:
invm = load_field(1 / mu)
if pec is None:
gpec = load_field(numpy.array([]), dtype=numpy.int8)
else:
gpec = load_field(pec.astype(bool), dtype=numpy.int8)
if pmc is None:
gpmc = load_field(numpy.array([]), dtype=numpy.int8)
else:
gpmc = load_field(pmc.astype(bool), dtype=numpy.int8)
'''
Generate OpenCL kernels
'''
has_mu, has_pec, has_pmc = [q is not None for q in (mu, pec, pmc)]
a_step_full = ops.create_a(context, shape, has_mu, has_pec, has_pmc)
xr_step = ops.create_xr_step(context)
rhoerr_step = ops.create_rhoerr_step(context)
p_step = ops.create_p_step(context)
dot = ops.create_dot(context)
def a_step(E, H, p, events):
return a_step_full(E, H, p, inv_dxes, oeps, invm, gpec, gpmc, Pl, Pr, events)
'''
Start the solve
'''
start_time2 = time.perf_counter()
_, err2 = rhoerr_step(r, [])
b_norm = numpy.sqrt(err2)
logging.debug('b_norm check: {}'.format(b_norm))
success = False
for k in range(max_iters):
do_print = (k % 100 == 0)
if do_print:
logger.debug('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha))
rho_prev = rho
e = xr_step(x, p, r, v, alpha, [])
rho, err2 = rhoerr_step(r, e)
errs += [numpy.sqrt(err2) / b_norm]
if do_print:
logger.debug('err {}'.format(errs[-1]))
if errs[-1] < err_threshold:
success = True
break
e = p_step(p, r, rho/rho_prev, [])
e = a_step(v, H, p, e)
alpha = rho / dot(p, v, e)
if k % 1000 == 0:
logger.info('iteration {}'.format(k))
'''
Done solving
'''
time_elapsed = time.perf_counter() - start_time
# Undo preconditioners
if adjoint:
x = (Pl * x).get()
else:
x = (Pr * x).get()
if success:
logger.info('Solve success')
else:
logger.warning('Solve failure')
logger.info('{} iterations in {} sec: {} iterations/sec \
'.format(k, time_elapsed, k / time_elapsed))
logger.debug('final error {}'.format(errs[-1]))
logger.debug('overhead {} sec'.format(start_time2 - start_time))
A0 = fdfd_tools.operators.e_full(omega, dxes, epsilon, mu).tocsr()
if adjoint:
# Remember we conjugated all the contents of A earlier
A0 = A0.T
logger.info('Post-everything residual: {}'.format(norm(A0 @ x - b) / norm(b)))
return x