opencl_fdfd/opencl_fdfd/main.py

"""
Default FDFD solver

This file holds the default FDFD solver, which uses an E-field wave
operator implemented directly as OpenCL arithmetic (rather than as
a matrix).
"""

from typing import List
import time
import logging

import numpy
from numpy.linalg import norm
import pyopencl
import pyopencl.array

import fdfd_tools.operators

from . import ops


__author__ = 'Jan Petykiewicz'

logger = logging.getLogger(__name__)


def cg_solver(omega: complex,
              dxes: List[List[numpy.ndarray]],
              J: numpy.ndarray,
              epsilon: numpy.ndarray,
              mu: numpy.ndarray = None,
              pec: numpy.ndarray = None,
              pmc: numpy.ndarray = None,
              adjoint: bool = False,
              max_iters: int = 40000,
              err_threshold: float = 1e-6,
              context: pyopencl.Context = None,
              ) -> numpy.ndarray:
    """
    OpenCL FDFD solver using the iterative conjugate gradient (cg) method
     and implementing the diagonalized E-field wave operator directly in
     OpenCL.

    All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays,
     either use fdfd_tools.vec() or numpy:
     f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z])))

    :param omega: Complex frequency to solve at.
    :param dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes)
    :param J: Electric current distribution (at E-field locations)
    :param epsilon: Dielectric constant distribution (at E-field locations)
    :param mu: Magnetic permeability distribution (at H-field locations)
    :param pec: Perfect electric conductor distribution
        (at E-field locations; non-zero value indicates PEC is present)
    :param pmc: Perfect magnetic conductor distribution
        (at H-field locations; non-zero value indicates PMC is present)
    :param adjoint: If true, solves the adjoint problem.
    :param max_iters: Maximum number of iterations. Default 40,000.
    :param err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success.
        Default 1e-6.
    :param context: PyOpenCL context to run in. If not given, construct a new context.
    :return: E-field which solves the system. Returned even if we did not converge.
    """

    start_time = time.perf_counter()

    b = -1j * omega * J

    shape = [d.size for d in dxes[0]]

    '''
        ** In this comment, I use the following notation:
           M* = conj(M),
           M.T = transpose(M),
           M' = ctranspose(M),
           M N = dot(M, N)

        This solver uses a symmetrized wave operator M = (L A R) = (L A R).T
         (where L = inv(R) are diagonal preconditioner matrices) when
         solving the wave equation; therefore, it solves the problem
            M y = d
         => (L A R) (inv(R) x) = (L b)
         => A x = b
         with x = R y

        From the fact that M is symmetric, we can write
         (L A R)* = M* = M' = (L A R)' = R' A' L' = R* A' L*
        We obtain M* by conjugating all of our arguments (except J).

        Then we solve
         (R* A' L*) v = (R* b)
        and obtain x:
          x = L* v

        We can accomplish all this simply by conjugating everything (except J) and
         reversing the order of L and R
    '''
    if adjoint:
        # Conjugate everything
        dxes = [[numpy.conj(d) for d in dd] for dd in dxes]
        omega = numpy.conj(omega)
        epsilon = numpy.conj(epsilon)
        if mu is not None:
            mu = numpy.conj(mu)

    L, R = fdfd_tools.operators.e_full_preconditioners(dxes)

    if adjoint:
        b_preconditioned = R @ b
    else:
        b_preconditioned = L @ b

    '''
        Allocate GPU memory and load in data
    '''
    if context is None:
        context = pyopencl.create_some_context(interactive=True)

    queue = pyopencl.CommandQueue(context)

    def load_field(v, dtype=numpy.complex128):
        return pyopencl.array.to_device(queue, v.astype(dtype))

    r = load_field(b_preconditioned)  # load preconditioned b into r
    H = pyopencl.array.zeros_like(r)
    x = pyopencl.array.zeros_like(r)
    v = pyopencl.array.zeros_like(r)
    p = pyopencl.array.zeros_like(r)

    alpha = 1.0 + 0j
    rho = 1.0 + 0j
    errs = []

    inv_dxes = [[load_field(1 / d) for d in dd] for dd in dxes]
    oeps = load_field(-omega ** 2 * epsilon)
    Pl = load_field(L.diagonal())
    Pr = load_field(R.diagonal())

    if mu is None:
        invm = load_field(numpy.array([]))
    else:
        invm = load_field(1 / mu)

    if pec is None:
        gpec = load_field(numpy.array([]), dtype=numpy.int8)
    else:
        gpec = load_field(pec.astype(bool), dtype=numpy.int8)

    if pmc is None:
        gpmc = load_field(numpy.array([]), dtype=numpy.int8)
    else:
        gpmc = load_field(pmc.astype(bool), dtype=numpy.int8)

    '''
    Generate OpenCL kernels
    '''
    has_mu, has_pec, has_pmc = [q is not None for q in (mu, pec, pmc)]

    a_step_full = ops.create_a(context, shape, has_mu, has_pec, has_pmc)
    xr_step = ops.create_xr_step(context)
    rhoerr_step = ops.create_rhoerr_step(context)
    p_step = ops.create_p_step(context)
    dot = ops.create_dot(context)

    def a_step(E, H, p, events):
        return a_step_full(E, H, p, inv_dxes, oeps, invm, gpec, gpmc, Pl, Pr, events)

    '''
    Start the solve
    '''
    start_time2 = time.perf_counter()

    _, err2 = rhoerr_step(r, [])
    b_norm = numpy.sqrt(err2)
    logging.debug('b_norm check: {}'.format(b_norm))

    success = False
    for k in range(max_iters):
        do_print = (k % 100 == 0)
        if do_print:
            logger.debug('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha))

        rho_prev = rho
        e = xr_step(x, p, r, v, alpha, [])
        rho, err2 = rhoerr_step(r, e)

        errs += [numpy.sqrt(err2) / b_norm]

        if do_print:
            logger.debug('err {}'.format(errs[-1]))

        if errs[-1] < err_threshold:
            success = True
            break

        e = p_step(p, r, rho/rho_prev, [])
        e = a_step(v, H, p, e)
        alpha = rho / dot(p, v, e)

        if k % 1000 == 0:
            logger.info('iteration {}'.format(k))

    '''
    Done solving
    '''
    time_elapsed = time.perf_counter() - start_time

    # Undo preconditioners
    if adjoint:
        x = (Pl * x).get()
    else:
        x = (Pr * x).get()

    if success:
        logger.info('Solve success')
    else:
        logger.warning('Solve failure')
    logger.info('{} iterations in {} sec: {} iterations/sec \
                  '.format(k, time_elapsed, k / time_elapsed))
    logger.debug('final error {}'.format(errs[-1]))
    logger.debug('overhead {} sec'.format(start_time2 - start_time))

    A0 = fdfd_tools.operators.e_full(omega, dxes, epsilon, mu).tocsr()
    if adjoint:
        # Remember we conjugated all the contents of A earlier
        A0 = A0.T
    logger.info('Post-everything residual: {}'.format(norm(A0 @ x - b) / norm(b)))
    return x
Cleanup and comment 8 years ago			`"""`
			`Default FDFD solver`

			`This file holds the default FDFD solver, which uses an E-field wave`
			`operator implemented directly as OpenCL arithmetic (rather than as`
			`a matrix).`
			`"""`

Documentation! 8 years ago			`from typing import List`
			`import time`
use logging package for output, and remove 'verbose' options 7 years ago			`import logging`
Documentation! 8 years ago
initial code commit 8 years ago			`import numpy`
			`from numpy.linalg import norm`
			`import pyopencl`
			`import pyopencl.array`

			`import fdfd_tools.operators`

split up into multiple files 8 years ago			`from . import ops`
initial code commit 8 years ago
use logging package for output, and remove 'verbose' options 7 years ago
Documentation! 8 years ago			`__author__ = 'Jan Petykiewicz'`

use logging package for output, and remove 'verbose' options 7 years ago			`logger = logging.getLogger(__name__)`

Documentation! 8 years ago
			`def cg_solver(omega: complex,`
			`dxes: List[List[numpy.ndarray]],`
			`J: numpy.ndarray,`
			`epsilon: numpy.ndarray,`
			`mu: numpy.ndarray = None,`
			`pec: numpy.ndarray = None,`
			`pmc: numpy.ndarray = None,`
			`adjoint: bool = False,`
			`max_iters: int = 40000,`
			`err_threshold: float = 1e-6,`
			`context: pyopencl.Context = None,`
			`) -> numpy.ndarray:`
			`"""`
			`OpenCL FDFD solver using the iterative conjugate gradient (cg) method`
			`and implementing the diagonalized E-field wave operator directly in`
			`OpenCL.`

			`All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays,`
			`either use fdfd_tools.vec() or numpy:`
			`f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z])))`

			`:param omega: Complex frequency to solve at.`
			`:param dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes)`
			`:param J: Electric current distribution (at E-field locations)`
			`:param epsilon: Dielectric constant distribution (at E-field locations)`
			`:param mu: Magnetic permeability distribution (at H-field locations)`
			`:param pec: Perfect electric conductor distribution`
			`(at E-field locations; non-zero value indicates PEC is present)`
			`:param pmc: Perfect magnetic conductor distribution`
			`(at H-field locations; non-zero value indicates PMC is present)`
			`:param adjoint: If true, solves the adjoint problem.`
			`:param max_iters: Maximum number of iterations. Default 40,000.`
			`:param err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success.`
			`Default 1e-6.`
			`:param context: PyOpenCL context to run in. If not given, construct a new context.`
			`:return: E-field which solves the system. Returned even if we did not converge.`
			`"""`
initial code commit 8 years ago
			`start_time = time.perf_counter()`

			`b = -1j * omega * J`

			`shape = [d.size for d in dxes[0]]`

			`'''`
clean up comment 8 years ago			`** In this comment, I use the following notation:`
			`M* = conj(M),`
			`M.T = transpose(M),`
			`M' = ctranspose(M),`
			`M N = dot(M, N)`
initial code commit 8 years ago
			`This solver uses a symmetrized wave operator M = (L A R) = (L A R).T`
			`(where L = inv(R) are diagonal preconditioner matrices) when`
			`solving the wave equation; therefore, it solves the problem`
			`M y = d`
			`=> (L A R) (inv(R) x) = (L b)`
			`=> A x = b`
			`with x = R y`

			`From the fact that M is symmetric, we can write`
			`(L A R)* = M* = M' = (L A R)' = R' A' L' = R* A' L*`
			`We obtain M* by conjugating all of our arguments (except J).`

			`Then we solve`
			`(R* A' L) v = (R b)`
			`and obtain x:`
			`x = L* v`

			`We can accomplish all this simply by conjugating everything (except J) and`
			`reversing the order of L and R`
			`'''`
			`if adjoint:`
			`# Conjugate everything`
			`dxes = [[numpy.conj(d) for d in dd] for dd in dxes]`
			`omega = numpy.conj(omega)`
			`epsilon = numpy.conj(epsilon)`
			`if mu is not None:`
			`mu = numpy.conj(mu)`

			`L, R = fdfd_tools.operators.e_full_preconditioners(dxes)`

			`if adjoint:`
			`b_preconditioned = R @ b`
			`else:`
			`b_preconditioned = L @ b`

			`'''`
			`Allocate GPU memory and load in data`
			`'''`
			`if context is None:`
consolidate boundary conditions in common.cl; add some comments and minor cleanup 7 years ago			`context = pyopencl.create_some_context(interactive=True)`
initial code commit 8 years ago
			`queue = pyopencl.CommandQueue(context)`

			`def load_field(v, dtype=numpy.complex128):`
			`return pyopencl.array.to_device(queue, v.astype(dtype))`

			`r = load_field(b_preconditioned) # load preconditioned b into r`
			`H = pyopencl.array.zeros_like(r)`
			`x = pyopencl.array.zeros_like(r)`
			`v = pyopencl.array.zeros_like(r)`
			`p = pyopencl.array.zeros_like(r)`

			`alpha = 1.0 + 0j`
			`rho = 1.0 + 0j`
			`errs = []`

			`inv_dxes = [[load_field(1 / d) for d in dd] for dd in dxes]`
			`oeps = load_field(-omega ** 2 * epsilon)`
			`Pl = load_field(L.diagonal())`
			`Pr = load_field(R.diagonal())`

			`if mu is None:`
			`invm = load_field(numpy.array([]))`
			`else:`
			`invm = load_field(1 / mu)`

			`if pec is None:`
split up into multiple files 8 years ago			`gpec = load_field(numpy.array([]), dtype=numpy.int8)`
initial code commit 8 years ago			`else:`
Conductor partial fixup 8 years ago			`gpec = load_field(pec.astype(bool), dtype=numpy.int8)`
initial code commit 8 years ago
			`if pmc is None:`
split up into multiple files 8 years ago			`gpmc = load_field(numpy.array([]), dtype=numpy.int8)`
initial code commit 8 years ago			`else:`
Conductor partial fixup 8 years ago			`gpmc = load_field(pmc.astype(bool), dtype=numpy.int8)`
initial code commit 8 years ago
			`'''`
			`Generate OpenCL kernels`
			`'''`
			`has_mu, has_pec, has_pmc = [q is not None for q in (mu, pec, pmc)]`

split up into multiple files 8 years ago			`a_step_full = ops.create_a(context, shape, has_mu, has_pec, has_pmc)`
			`xr_step = ops.create_xr_step(context)`
			`rhoerr_step = ops.create_rhoerr_step(context)`
			`p_step = ops.create_p_step(context)`
			`dot = ops.create_dot(context)`
initial code commit 8 years ago
			`def a_step(E, H, p, events):`
			`return a_step_full(E, H, p, inv_dxes, oeps, invm, gpec, gpmc, Pl, Pr, events)`

			`'''`
			`Start the solve`
			`'''`
			`start_time2 = time.perf_counter()`

			`_, err2 = rhoerr_step(r, [])`
			`b_norm = numpy.sqrt(err2)`
use logging package for output, and remove 'verbose' options 7 years ago			`logging.debug('b_norm check: {}'.format(b_norm))`
initial code commit 8 years ago
			`success = False`
			`for k in range(max_iters):`
use logging package for output, and remove 'verbose' options 7 years ago			`do_print = (k % 100 == 0)`
			`if do_print:`
			`logger.debug('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha))`
initial code commit 8 years ago
			`rho_prev = rho`
			`e = xr_step(x, p, r, v, alpha, [])`
			`rho, err2 = rhoerr_step(r, e)`

			`errs += [numpy.sqrt(err2) / b_norm]`

use logging package for output, and remove 'verbose' options 7 years ago			`if do_print:`
			`logger.debug('err {}'.format(errs[-1]))`
initial code commit 8 years ago
Documentation! 8 years ago			`if errs[-1] < err_threshold:`
initial code commit 8 years ago			`success = True`
			`break`

			`e = p_step(p, r, rho/rho_prev, [])`
			`e = a_step(v, H, p, e)`
			`alpha = rho / dot(p, v, e)`

			`if k % 1000 == 0:`
use logging package for output, and remove 'verbose' options 7 years ago			`logger.info('iteration {}'.format(k))`
initial code commit 8 years ago
			`'''`
			`Done solving`
			`'''`
			`time_elapsed = time.perf_counter() - start_time`

			`# Undo preconditioners`
			`if adjoint:`
			`x = (Pl * x).get()`
			`else:`
			`x = (Pr * x).get()`

			`if success:`
use logging package for output, and remove 'verbose' options 7 years ago			`logger.info('Solve success')`
initial code commit 8 years ago			`else:`
use logging package for output, and remove 'verbose' options 7 years ago			`logger.warning('Solve failure')`
			`logger.info('{} iterations in {} sec: {} iterations/sec \`
initial code commit 8 years ago			`'.format(k, time_elapsed, k / time_elapsed))`
use logging package for output, and remove 'verbose' options 7 years ago			`logger.debug('final error {}'.format(errs[-1]))`
			`logger.debug('overhead {} sec'.format(start_time2 - start_time))`
initial code commit 8 years ago
			`A0 = fdfd_tools.operators.e_full(omega, dxes, epsilon, mu).tocsr()`
			`if adjoint:`
			`# Remember we conjugated all the contents of A earlier`
			`A0 = A0.T`
use logging package for output, and remove 'verbose' options 7 years ago			`logger.info('Post-everything residual: {}'.format(norm(A0 @ x - b) / norm(b)))`
eol at eof 8 years ago			`return x`
Newline at eof 8 years ago