Documentation!

2016-08-04 17:43:01 -07:00 · 2016-08-04 17:43:01 -07:00 · 89caff471f
commit 89caff471f
parent 78d3e17d92
5 changed files with 308 additions and 23 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,37 @@
 # opencl_fdfd
-OpenCL FDFD solver
+**opencl_fdfd** is a 3D Finite Difference Frequency Domain (FDFD)
 solver implemented in Python and OpenCL.
 **Capabilities**
 * Arbitrary distributions of the following:
    * Dielectric constant (epsilon)
    * Magnetic permeabilty (mu)
    * Perfect electric conductor (PEC)
    * Perfect magnetic conductor (PMC)
 * Variable-sized rectangular grids
    * Stretched-coordinate PMLs (complex cell sizes allowed)
 Currently, only periodic boundary conditions are included.
 PEC/PMC boundaries can be implemented by drawing PEC/PMC cells near the edges.
 Bloch boundary conditions are not included but wouldn't be very hard to add.
 The default solver (opencl_fdfd.cg_solver(...)) located in main.py implements
 the E-field wave operator directly (ie, as a list of OpenCL instructions
 rather than a matrix). Additionally, there is a slower (and slightly more
 versatile) sovler in csr.py which attempts to solve an arbitrary sparse
 matrix in compressed sparse row (CSR) format using the same conjugate gradient
 method as the default solver. The CSR solver is significantly slower, but can
 be very useful for testing alternative formulations of the FDFD wave equation.
 Currently, this solver only uses a single GPU or other OpenCL accelerator;
 generalization to multiple GPUs should be pretty straightforward
 (ie, just copy over edge values during the matrix multiplication step).
 **Dependencies:**
 * python 3 (written and tested with 3.5) 
 * numpy
 * pyopencl
 * jinja2
 * [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)
--- a/opencl_fdfd/init.py
+++ b/opencl_fdfd/init.py
@ -1 +1,42 @@
-from .main import cg_solver
+"""
 opencl_fdfd OpenCL 3D FDFD solver
 opencl_fdfd is a 3D Finite Difference Frequency Domain (FDFD) solver implemented in
  Python and OpenCL. Its capabilities include:
  - Arbitrary distributions of the following:
    - Dielectric constant (epsilon)
    - Magnetic permeabilty (mu)
    - Perfect electric conductor (PEC)
    - Perfect magnetic conductor (PMC)
  - Variable-sized rectangular grids
    - Stretched-coordinate PMLs (complex cell sizes allowed)
  Currently, only periodic boundary conditions are included.
  PEC/PMC boundaries can be implemented by drawing PEC/PMC cells near the edges.
  Bloch boundary conditions are not included but wouldn't be very hard to add.
  The default solver (opencl_fdfd.cg_solver(...)) located in main.py implements
   the E-field wave operator directly (ie, as a list of OpenCL instructions
   rather than a matrix). Additionally, there is a slower (and slightly more
   versatile) sovler in csr.py which attempts to solve an arbitrary sparse
   matrix in compressed sparse row (CSR) format using the same conjugate gradient
   method as the default solver. The CSR solver is significantly slower, but can
   be very useful for testing alternative formulations of the FDFD wave equation.
  Currently, this solver only uses a single GPU or other OpenCL accelerator; generalization
  to multiple GPUs should be pretty straightforward (ie, just copy over edge values during the
  matrix multiplication step).
  Dependencies:
    - fdfd_tools    ( https://mpxd.net/gogs/jan/fdfd_tools )
    - numpy
    - pyopencl
    - jinja2
 """
 from .main import cg_solver
 __author__ = 'Jan Petykiewicz'
--- a/opencl_fdfd/csr.py
+++ b/opencl_fdfd/csr.py
@ -1,27 +1,53 @@
 from typing import List, Dict, Any
 import time
 import numpy
 from numpy.linalg import norm
 import pyopencl
 import pyopencl.array
 import time
 import fdfd_tools.operators
 from . import ops
 class CSRMatrix(object):
    """
    Matrix stored in Compressed Sparse Row format, in GPU RAM.
    """
    row_ptr = None      # type: pyopencl.array.Array
    col_ind = None      # type: pyopencl.array.Array
    data = None         # type: pyopencl.array.Array
-    def __init__(self, queue, m):
+    def __init__(self,
                 queue: pyopencl.CommandQueue,
                 m: 'scipy.sparse.csr_matrix'):
        self.row_ptr = pyopencl.array.to_device(queue, m.indptr)
        self.col_ind = pyopencl.array.to_device(queue, m.indices)
        self.data = pyopencl.array.to_device(queue, m.data.astype(numpy.complex128))
-def cg(a, b, max_iters=10000, err_thresh=1e-6, context=None, queue=None, verbose=False):
+def cg(a: 'scipy.sparse.csr_matrix',
       b: numpy.ndarray,
       max_iters: int = 10000,
       err_threshold: float = 1e-6,
       context: pyopencl.Context = None,
       queue: pyopencl.CommandQueue = None,
       verbose: bool = False,
       ) -> numpy.ndarray:
    """
    General conjugate-gradient solver for sparse matrices, where A @ x = b.
    :param a: Matrix to solve (CSR format)
    :param b: Right-hand side vector (dense ndarray)
    :param max_iters: Maximum number of iterations
    :param err_threshold: Error threshold for successful solve, relative to norm(b)
    :param context: PyOpenCL context. Will be created if not given.
    :param queue: PyOpenCL command queue. Will be created if not given.
    :param verbose: Whether to print statistics to screen.
    :return: Solution vector x; returned even if solve doesn't converge.
    """
    start_time = time.perf_counter()
    if context is None:
@ -44,7 +70,6 @@ def cg(a, b, max_iters=10000, err_thresh=1e-6, context=None, queue=None, verbose
    m = CSRMatrix(queue, a)
    '''
    Generate OpenCL kernels
    '''
@ -77,7 +102,7 @@ def cg(a, b, max_iters=10000, err_thresh=1e-6, context=None, queue=None, verbose
        if verbose:
            print('err', errs[-1])
-        if errs[-1] < err_thresh:
+        if errs[-1] < err_threshold:
            success = True
            break
@ -108,7 +133,38 @@ def cg(a, b, max_iters=10000, err_thresh=1e-6, context=None, queue=None, verbose
    return x
-def cg_solver(omega, dxes, J, epsilon, mu=None, pec=None, pmc=None, adjoint=False, solver_opts=None):
+def cg_solver(omega: complex,
              dxes: List[List[numpy.ndarray]],
              J: numpy.ndarray,
              epsilon: numpy.ndarray,
              mu: numpy.ndarray = None,
              pec: numpy.ndarray = None,
              pmc: numpy.ndarray = None,
              adjoint: bool = False,
              solver_opts: Dict[str, Any] = None,
              ) -> numpy.ndarray:
    """
    Conjugate gradient FDFD solver using CSR sparse matrices, mainly for
     testing and development since it's much slower than the solver in main.py.
    All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays,
     either use fdfd_tools.vec() or numpy:
     f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z])))
    :param omega: Complex frequency to solve at.
    :param dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes)
    :param J: Electric current distribution (at E-field locations)
    :param epsilon: Dielectric constant distribution (at E-field locations)
    :param mu: Magnetic permeability distribution (at H-field locations)
    :param pec: Perfect electric conductor distribution
        (at E-field locations; non-zero value indicates PEC is present)
    :param pmc: Perfect magnetic conductor distribution
        (at H-field locations; non-zero value indicates PMC is present)
    :param adjoint: If true, solves the adjoint problem.
    :param solver_opts: Passed as kwargs to opencl_fdfd.csr.cg(**solver_opts)
    :return: E-field which solves the system.
    """
    if solver_opts is None:
        solver_opts = dict()
--- a/opencl_fdfd/main.py
+++ b/opencl_fdfd/main.py
@ -1,17 +1,58 @@
 from typing import List
 import time
 import numpy
 from numpy.linalg import norm
 import pyopencl
 import pyopencl.array
 import time
 import fdfd_tools.operators
 from . import ops
 __author__ = 'Jan Petykiewicz'
 def cg_solver(omega: complex,
              dxes: List[List[numpy.ndarray]],
              J: numpy.ndarray,
              epsilon: numpy.ndarray,
              mu: numpy.ndarray = None,
              pec: numpy.ndarray = None,
              pmc: numpy.ndarray = None,
              adjoint: bool = False,
              max_iters: int = 40000,
              err_threshold: float = 1e-6,
              context: pyopencl.Context = None,
              verbose: bool = False,
              ) -> numpy.ndarray:
    """
    OpenCL FDFD solver using the iterative conjugate gradient (cg) method
     and implementing the diagonalized E-field wave operator directly in
     OpenCL.
    All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays,
     either use fdfd_tools.vec() or numpy:
     f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z])))
    :param omega: Complex frequency to solve at.
    :param dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes)
    :param J: Electric current distribution (at E-field locations)
    :param epsilon: Dielectric constant distribution (at E-field locations)
    :param mu: Magnetic permeability distribution (at H-field locations)
    :param pec: Perfect electric conductor distribution
        (at E-field locations; non-zero value indicates PEC is present)
    :param pmc: Perfect magnetic conductor distribution
        (at H-field locations; non-zero value indicates PMC is present)
    :param adjoint: If true, solves the adjoint problem.
    :param max_iters: Maximum number of iterations. Default 40,000.
    :param err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success.
        Default 1e-6.
    :param context: PyOpenCL context to run in. If not given, construct a new context.
    :param verbose: If True, print progress to stdout. Default False.
    :return: E-field which solves the system. Returned even if we did not converge.
    """
 def cg_solver(omega, dxes, J, epsilon, mu=None, pec=None, pmc=None, adjoint=False,
              max_iters=40000, err_thresh=1e-6, context=None, verbose=False):
    start_time = time.perf_counter()
    b = -1j * omega * J
@ -138,7 +179,7 @@ def cg_solver(omega, dxes, J, epsilon, mu=None, pec=None, pmc=None, adjoint=Fals
        if verbose:
            print('err', errs[-1])
-        if errs[-1] < err_thresh:
+        if errs[-1] < err_threshold:
            success = True
            break
--- a/opencl_fdfd/ops.py
+++ b/opencl_fdfd/ops.py
@ -1,3 +1,5 @@
 from typing import List, Callable
 import numpy
 import jinja2
@ -9,6 +11,9 @@ from pyopencl.reduction import ReductionKernel
 # Create jinja2 env on module load
 jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
 # Return type for the create_opname(...) functions
 operation = Callable[..., List[pyopencl.Event]]
 def type_to_C(float_type: numpy.float32 or numpy.float64) -> str:
    """
@ -28,9 +33,11 @@ def type_to_C(float_type: numpy.float32 or numpy.float64) -> str:
    return types[float_type]
 # Type names
 ctype = type_to_C(numpy.complex128)
 ctype_bare = 'cdouble'
 # Preamble for all OpenCL code
 preamble = '''
 #define PYOPENCL_DEFINE_CDOUBLE
 #include <pyopencl-complex.h>
@ -44,11 +51,43 @@ preamble = '''
 '''.format(ctype=ctype_bare)
-def ptrs(*args):
+def ptrs(*args: str) -> List[str]:
    return [ctype + ' *' + s for s in args]
-def create_a(context, shape, mu=False, pec=False, pmc=False):
+def create_a(context: pyopencl.Context,
             shape: numpy.ndarray,
             mu: bool = False,
             pec: bool = False,
             pmc: bool = False,
             ) -> operation:
    """
    Return a function which performs (A @ p), where A is the FDFD wave equation for E-field.
    The returned function has the signature
    spmv(E, H, p, idxes, oeps, inv_mu, pec, pmc, Pl, Pr, e)
    with arguments (all except e are of type pyopencl.array.Array (or contain it)):
     E      E-field (output)
     H      Temporary variable for holding intermediate H-field values on GPU (same size as E)
     p      p-vector (input vector)
     idxes  list holding [[1/dx_e, 1/dy_e, 1/dz_e],  [1/dx_h, 1/dy_h, 1/dz_h]] (complex cell widths)
     oeps   omega * epsilon
     inv_mu 1/mu
     pec    array of bytes; nonzero value indicates presence of PEC
     pmc    array of bytes; nonzero value indicates presence of PMC
     Pl     Left preconditioner (array containing diagonal entries only)
     Pr     Right preconditioner (array containing diagonal entries only)
     e      List of pyopencl.Event; execution will wait until these are finished.
     and returns a list of pyopencl.Event.
    :param context: PyOpenCL context
    :param shape: Dimensions of the E-field
    :param mu: False iff (mu == 1) everywhere
    :param pec: False iff no PEC anywhere
    :param pmc: False iff no PMC anywhere
    :return: Function for computing (A @ p)
    """
    common_source = jinja_env.get_template('common.cl').render(shape=shape)
@ -57,6 +96,9 @@ def create_a(context, shape, mu=False, pec=False, pmc=False):
    des = [ctype + ' *inv_de' + a for a in 'xyz']
    dhs = [ctype + ' *inv_dh' + a for a in 'xyz']
    '''
    Convert p to initial E (ie, apply right preconditioner and PEC)
    '''
    p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec)
    P2E_kernel = ElementwiseKernel(context,
                                   name='P2E',
@ -64,6 +106,9 @@ def create_a(context, shape, mu=False, pec=False, pmc=False):
                                   operation=p2e_source,
                                   arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg))
    '''
    Calculate intermediate H from intermediate E
    '''
    e2h_source = jinja_env.get_template('e2h.cl').render(mu=mu,
                                                         pmc=pmc,
                                                         common_cl=common_source)
@ -73,6 +118,9 @@ def create_a(context, shape, mu=False, pec=False, pmc=False):
                                   operation=e2h_source,
                                   arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des))
    '''
    Calculate final E (including left preconditioner)
    '''
    h2e_source = jinja_env.get_template('h2e.cl').render(pec=pec,
                                                         common_cl=common_source)
    H2E_kernel = ElementwiseKernel(context,
@ -90,7 +138,20 @@ def create_a(context, shape, mu=False, pec=False, pmc=False):
    return spmv
-def create_xr_step(context):
+def create_xr_step(context: pyopencl.Context) -> operation:
    """
    Return a function
     xr_update(x, p, r, v, alpha, e)
    which performs the operations
     x += alpha * p
     r -= alpha * v
    after waiting for all in the list e
    and returns a list of pyopencl.Event
    :param context: PyOpenCL context
    :return: Function for performing x and r updates
    """
    update_xr_source = '''
    x[i] = add(x[i], mul(alpha, p[i]));
    r[i] = sub(r[i], mul(alpha, v[i]));
@ -110,13 +171,28 @@ def create_xr_step(context):
    return xr_update
-def create_rhoerr_step(context):
+def create_rhoerr_step(context: pyopencl.Context) -> operation:
    """
    Return a function
     ri_update(r, e)
    which performs the operations
     rho = r * r.conj()
     err = r * r
    after waiting for all pyopencl.Event in the list e
    and returns a list of pyopencl.Event
    :param context: PyOpenCL context
    :return: Function for performing x and r updates
    """
    update_ri_source = '''
    (double3)(r[i].real * r[i].real, \
              r[i].real * r[i].imag, \
              r[i].imag * r[i].imag)
    '''
    # Use a vector type (double3) to make the reduction simpler
    ri_dtype = pyopencl.array.vec.double3
    ri_kernel = ReductionKernel(context,
@ -138,7 +214,19 @@ def create_rhoerr_step(context):
    return ri_update
-def create_p_step(context):
+def create_p_step(context: pyopencl.Context) -> operation:
    """
    Return a function
     p_update(p, r, beta, e)
    which performs the operation
     p = r + beta * p
    after waiting for all pyopencl.Event in the list e
    and returns a list of pyopencl.Event
    :param context: PyOpenCL context
    :return: Function for performing the p update
    """
    update_p_source = '''
    p[i] = add(r[i], mul(beta, p[i]));
    '''
@ -156,7 +244,16 @@ def create_p_step(context):
    return p_update
-def create_dot(context):
+def create_dot(context: pyopencl.Context) -> operation:
    """
    Return a function for performing the dot product
     p @ v
    with the signature
     dot(p, v, e) -> float
    :param context: PyOpenCL context
    :return: Function for performing the dot product
    """
    dot_dtype = numpy.complex128
    dot_kernel = ReductionKernel(context,
@ -168,14 +265,30 @@ def create_dot(context):
                                 reduce_expr='add(a, b)',
                                 arguments=ptrs('p', 'v'))
-    def ri_update(p, v, e):
+    def dot(p, v, e):
        g = dot_kernel(p, v, wait_for=e)
        return g.get()
-    return ri_update
+    return dot
-def create_a_csr(context):
+def create_a_csr(context: pyopencl.Context) -> operation:
    """
    Return a function for performing the operation
     (N @ v)
    where N is stored in CSR (compressed sparse row) format.
    The function signature is
     spmv(v_out, m, v_in, e)
    where m is an opencl_fdfd.csr.CSRMatrix
    and v_out, v_in are (dense) vectors (of type pyopencl.array.Array).
    The function waits on all the pyopencl.Event in e before running, and returns
     a list of pyopencl.Event.
    :param context: PyOpenCL context
    :return: Function for sparse (M @ v) operation where M is in CSR format
    """
    spmv_source = '''
    int start = m_row_ptr[i];
    int stop = m_row_ptr[i+1];