Documentation!

release
jan 8 years ago
parent 78d3e17d92
commit 89caff471f

@ -1,3 +1,37 @@
# opencl_fdfd
OpenCL FDFD solver
**opencl_fdfd** is a 3D Finite Difference Frequency Domain (FDFD)
solver implemented in Python and OpenCL.
**Capabilities**
* Arbitrary distributions of the following:
* Dielectric constant (epsilon)
* Magnetic permeabilty (mu)
* Perfect electric conductor (PEC)
* Perfect magnetic conductor (PMC)
* Variable-sized rectangular grids
* Stretched-coordinate PMLs (complex cell sizes allowed)
Currently, only periodic boundary conditions are included.
PEC/PMC boundaries can be implemented by drawing PEC/PMC cells near the edges.
Bloch boundary conditions are not included but wouldn't be very hard to add.
The default solver (opencl_fdfd.cg_solver(...)) located in main.py implements
the E-field wave operator directly (ie, as a list of OpenCL instructions
rather than a matrix). Additionally, there is a slower (and slightly more
versatile) sovler in csr.py which attempts to solve an arbitrary sparse
matrix in compressed sparse row (CSR) format using the same conjugate gradient
method as the default solver. The CSR solver is significantly slower, but can
be very useful for testing alternative formulations of the FDFD wave equation.
Currently, this solver only uses a single GPU or other OpenCL accelerator;
generalization to multiple GPUs should be pretty straightforward
(ie, just copy over edge values during the matrix multiplication step).
**Dependencies:**
* python 3 (written and tested with 3.5)
* numpy
* pyopencl
* jinja2
* [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools)

@ -1 +1,42 @@
from .main import cg_solver
"""
opencl_fdfd OpenCL 3D FDFD solver
opencl_fdfd is a 3D Finite Difference Frequency Domain (FDFD) solver implemented in
Python and OpenCL. Its capabilities include:
- Arbitrary distributions of the following:
- Dielectric constant (epsilon)
- Magnetic permeabilty (mu)
- Perfect electric conductor (PEC)
- Perfect magnetic conductor (PMC)
- Variable-sized rectangular grids
- Stretched-coordinate PMLs (complex cell sizes allowed)
Currently, only periodic boundary conditions are included.
PEC/PMC boundaries can be implemented by drawing PEC/PMC cells near the edges.
Bloch boundary conditions are not included but wouldn't be very hard to add.
The default solver (opencl_fdfd.cg_solver(...)) located in main.py implements
the E-field wave operator directly (ie, as a list of OpenCL instructions
rather than a matrix). Additionally, there is a slower (and slightly more
versatile) sovler in csr.py which attempts to solve an arbitrary sparse
matrix in compressed sparse row (CSR) format using the same conjugate gradient
method as the default solver. The CSR solver is significantly slower, but can
be very useful for testing alternative formulations of the FDFD wave equation.
Currently, this solver only uses a single GPU or other OpenCL accelerator; generalization
to multiple GPUs should be pretty straightforward (ie, just copy over edge values during the
matrix multiplication step).
Dependencies:
- fdfd_tools ( https://mpxd.net/gogs/jan/fdfd_tools )
- numpy
- pyopencl
- jinja2
"""
from .main import cg_solver
__author__ = 'Jan Petykiewicz'

@ -1,27 +1,53 @@
from typing import List, Dict, Any
import time
import numpy
from numpy.linalg import norm
import pyopencl
import pyopencl.array
import time
import fdfd_tools.operators
from . import ops
class CSRMatrix(object):
"""
Matrix stored in Compressed Sparse Row format, in GPU RAM.
"""
row_ptr = None # type: pyopencl.array.Array
col_ind = None # type: pyopencl.array.Array
data = None # type: pyopencl.array.Array
def __init__(self, queue, m):
def __init__(self,
queue: pyopencl.CommandQueue,
m: 'scipy.sparse.csr_matrix'):
self.row_ptr = pyopencl.array.to_device(queue, m.indptr)
self.col_ind = pyopencl.array.to_device(queue, m.indices)
self.data = pyopencl.array.to_device(queue, m.data.astype(numpy.complex128))
def cg(a, b, max_iters=10000, err_thresh=1e-6, context=None, queue=None, verbose=False):
def cg(a: 'scipy.sparse.csr_matrix',
b: numpy.ndarray,
max_iters: int = 10000,
err_threshold: float = 1e-6,
context: pyopencl.Context = None,
queue: pyopencl.CommandQueue = None,
verbose: bool = False,
) -> numpy.ndarray:
"""
General conjugate-gradient solver for sparse matrices, where A @ x = b.
:param a: Matrix to solve (CSR format)
:param b: Right-hand side vector (dense ndarray)
:param max_iters: Maximum number of iterations
:param err_threshold: Error threshold for successful solve, relative to norm(b)
:param context: PyOpenCL context. Will be created if not given.
:param queue: PyOpenCL command queue. Will be created if not given.
:param verbose: Whether to print statistics to screen.
:return: Solution vector x; returned even if solve doesn't converge.
"""
start_time = time.perf_counter()
if context is None:
@ -44,7 +70,6 @@ def cg(a, b, max_iters=10000, err_thresh=1e-6, context=None, queue=None, verbose
m = CSRMatrix(queue, a)
'''
Generate OpenCL kernels
'''
@ -77,7 +102,7 @@ def cg(a, b, max_iters=10000, err_thresh=1e-6, context=None, queue=None, verbose
if verbose:
print('err', errs[-1])
if errs[-1] < err_thresh:
if errs[-1] < err_threshold:
success = True
break
@ -108,7 +133,38 @@ def cg(a, b, max_iters=10000, err_thresh=1e-6, context=None, queue=None, verbose
return x
def cg_solver(omega, dxes, J, epsilon, mu=None, pec=None, pmc=None, adjoint=False, solver_opts=None):
def cg_solver(omega: complex,
dxes: List[List[numpy.ndarray]],
J: numpy.ndarray,
epsilon: numpy.ndarray,
mu: numpy.ndarray = None,
pec: numpy.ndarray = None,
pmc: numpy.ndarray = None,
adjoint: bool = False,
solver_opts: Dict[str, Any] = None,
) -> numpy.ndarray:
"""
Conjugate gradient FDFD solver using CSR sparse matrices, mainly for
testing and development since it's much slower than the solver in main.py.
All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays,
either use fdfd_tools.vec() or numpy:
f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z])))
:param omega: Complex frequency to solve at.
:param dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes)
:param J: Electric current distribution (at E-field locations)
:param epsilon: Dielectric constant distribution (at E-field locations)
:param mu: Magnetic permeability distribution (at H-field locations)
:param pec: Perfect electric conductor distribution
(at E-field locations; non-zero value indicates PEC is present)
:param pmc: Perfect magnetic conductor distribution
(at H-field locations; non-zero value indicates PMC is present)
:param adjoint: If true, solves the adjoint problem.
:param solver_opts: Passed as kwargs to opencl_fdfd.csr.cg(**solver_opts)
:return: E-field which solves the system.
"""
if solver_opts is None:
solver_opts = dict()

@ -1,17 +1,58 @@
from typing import List
import time
import numpy
from numpy.linalg import norm
import pyopencl
import pyopencl.array
import time
import fdfd_tools.operators
from . import ops
__author__ = 'Jan Petykiewicz'
def cg_solver(omega: complex,
dxes: List[List[numpy.ndarray]],
J: numpy.ndarray,
epsilon: numpy.ndarray,
mu: numpy.ndarray = None,
pec: numpy.ndarray = None,
pmc: numpy.ndarray = None,
adjoint: bool = False,
max_iters: int = 40000,
err_threshold: float = 1e-6,
context: pyopencl.Context = None,
verbose: bool = False,
) -> numpy.ndarray:
"""
OpenCL FDFD solver using the iterative conjugate gradient (cg) method
and implementing the diagonalized E-field wave operator directly in
OpenCL.
All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays,
either use fdfd_tools.vec() or numpy:
f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z])))
:param omega: Complex frequency to solve at.
:param dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes)
:param J: Electric current distribution (at E-field locations)
:param epsilon: Dielectric constant distribution (at E-field locations)
:param mu: Magnetic permeability distribution (at H-field locations)
:param pec: Perfect electric conductor distribution
(at E-field locations; non-zero value indicates PEC is present)
:param pmc: Perfect magnetic conductor distribution
(at H-field locations; non-zero value indicates PMC is present)
:param adjoint: If true, solves the adjoint problem.
:param max_iters: Maximum number of iterations. Default 40,000.
:param err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success.
Default 1e-6.
:param context: PyOpenCL context to run in. If not given, construct a new context.
:param verbose: If True, print progress to stdout. Default False.
:return: E-field which solves the system. Returned even if we did not converge.
"""
def cg_solver(omega, dxes, J, epsilon, mu=None, pec=None, pmc=None, adjoint=False,
max_iters=40000, err_thresh=1e-6, context=None, verbose=False):
start_time = time.perf_counter()
b = -1j * omega * J
@ -138,7 +179,7 @@ def cg_solver(omega, dxes, J, epsilon, mu=None, pec=None, pmc=None, adjoint=Fals
if verbose:
print('err', errs[-1])
if errs[-1] < err_thresh:
if errs[-1] < err_threshold:
success = True
break

@ -1,3 +1,5 @@
from typing import List, Callable
import numpy
import jinja2
@ -9,6 +11,9 @@ from pyopencl.reduction import ReductionKernel
# Create jinja2 env on module load
jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
# Return type for the create_opname(...) functions
operation = Callable[..., List[pyopencl.Event]]
def type_to_C(float_type: numpy.float32 or numpy.float64) -> str:
"""
@ -28,9 +33,11 @@ def type_to_C(float_type: numpy.float32 or numpy.float64) -> str:
return types[float_type]
# Type names
ctype = type_to_C(numpy.complex128)
ctype_bare = 'cdouble'
# Preamble for all OpenCL code
preamble = '''
#define PYOPENCL_DEFINE_CDOUBLE
#include <pyopencl-complex.h>
@ -44,11 +51,43 @@ preamble = '''
'''.format(ctype=ctype_bare)
def ptrs(*args):
def ptrs(*args: str) -> List[str]:
return [ctype + ' *' + s for s in args]
def create_a(context, shape, mu=False, pec=False, pmc=False):
def create_a(context: pyopencl.Context,
shape: numpy.ndarray,
mu: bool = False,
pec: bool = False,
pmc: bool = False,
) -> operation:
"""
Return a function which performs (A @ p), where A is the FDFD wave equation for E-field.
The returned function has the signature
spmv(E, H, p, idxes, oeps, inv_mu, pec, pmc, Pl, Pr, e)
with arguments (all except e are of type pyopencl.array.Array (or contain it)):
E E-field (output)
H Temporary variable for holding intermediate H-field values on GPU (same size as E)
p p-vector (input vector)
idxes list holding [[1/dx_e, 1/dy_e, 1/dz_e], [1/dx_h, 1/dy_h, 1/dz_h]] (complex cell widths)
oeps omega * epsilon
inv_mu 1/mu
pec array of bytes; nonzero value indicates presence of PEC
pmc array of bytes; nonzero value indicates presence of PMC
Pl Left preconditioner (array containing diagonal entries only)
Pr Right preconditioner (array containing diagonal entries only)
e List of pyopencl.Event; execution will wait until these are finished.
and returns a list of pyopencl.Event.
:param context: PyOpenCL context
:param shape: Dimensions of the E-field
:param mu: False iff (mu == 1) everywhere
:param pec: False iff no PEC anywhere
:param pmc: False iff no PMC anywhere
:return: Function for computing (A @ p)
"""
common_source = jinja_env.get_template('common.cl').render(shape=shape)
@ -57,6 +96,9 @@ def create_a(context, shape, mu=False, pec=False, pmc=False):
des = [ctype + ' *inv_de' + a for a in 'xyz']
dhs = [ctype + ' *inv_dh' + a for a in 'xyz']
'''
Convert p to initial E (ie, apply right preconditioner and PEC)
'''
p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec)
P2E_kernel = ElementwiseKernel(context,
name='P2E',
@ -64,6 +106,9 @@ def create_a(context, shape, mu=False, pec=False, pmc=False):
operation=p2e_source,
arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg))
'''
Calculate intermediate H from intermediate E
'''
e2h_source = jinja_env.get_template('e2h.cl').render(mu=mu,
pmc=pmc,
common_cl=common_source)
@ -73,6 +118,9 @@ def create_a(context, shape, mu=False, pec=False, pmc=False):
operation=e2h_source,
arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des))
'''
Calculate final E (including left preconditioner)
'''
h2e_source = jinja_env.get_template('h2e.cl').render(pec=pec,
common_cl=common_source)
H2E_kernel = ElementwiseKernel(context,
@ -90,7 +138,20 @@ def create_a(context, shape, mu=False, pec=False, pmc=False):
return spmv
def create_xr_step(context):
def create_xr_step(context: pyopencl.Context) -> operation:
"""
Return a function
xr_update(x, p, r, v, alpha, e)
which performs the operations
x += alpha * p
r -= alpha * v
after waiting for all in the list e
and returns a list of pyopencl.Event
:param context: PyOpenCL context
:return: Function for performing x and r updates
"""
update_xr_source = '''
x[i] = add(x[i], mul(alpha, p[i]));
r[i] = sub(r[i], mul(alpha, v[i]));
@ -110,13 +171,28 @@ def create_xr_step(context):
return xr_update
def create_rhoerr_step(context):
def create_rhoerr_step(context: pyopencl.Context) -> operation:
"""
Return a function
ri_update(r, e)
which performs the operations
rho = r * r.conj()
err = r * r
after waiting for all pyopencl.Event in the list e
and returns a list of pyopencl.Event
:param context: PyOpenCL context
:return: Function for performing x and r updates
"""
update_ri_source = '''
(double3)(r[i].real * r[i].real, \
r[i].real * r[i].imag, \
r[i].imag * r[i].imag)
'''
# Use a vector type (double3) to make the reduction simpler
ri_dtype = pyopencl.array.vec.double3
ri_kernel = ReductionKernel(context,
@ -138,7 +214,19 @@ def create_rhoerr_step(context):
return ri_update
def create_p_step(context):
def create_p_step(context: pyopencl.Context) -> operation:
"""
Return a function
p_update(p, r, beta, e)
which performs the operation
p = r + beta * p
after waiting for all pyopencl.Event in the list e
and returns a list of pyopencl.Event
:param context: PyOpenCL context
:return: Function for performing the p update
"""
update_p_source = '''
p[i] = add(r[i], mul(beta, p[i]));
'''
@ -156,7 +244,16 @@ def create_p_step(context):
return p_update
def create_dot(context):
def create_dot(context: pyopencl.Context) -> operation:
"""
Return a function for performing the dot product
p @ v
with the signature
dot(p, v, e) -> float
:param context: PyOpenCL context
:return: Function for performing the dot product
"""
dot_dtype = numpy.complex128
dot_kernel = ReductionKernel(context,
@ -168,14 +265,30 @@ def create_dot(context):
reduce_expr='add(a, b)',
arguments=ptrs('p', 'v'))
def ri_update(p, v, e):
def dot(p, v, e):
g = dot_kernel(p, v, wait_for=e)
return g.get()
return ri_update
return dot
def create_a_csr(context):
def create_a_csr(context: pyopencl.Context) -> operation:
"""
Return a function for performing the operation
(N @ v)
where N is stored in CSR (compressed sparse row) format.
The function signature is
spmv(v_out, m, v_in, e)
where m is an opencl_fdfd.csr.CSRMatrix
and v_out, v_in are (dense) vectors (of type pyopencl.array.Array).
The function waits on all the pyopencl.Event in e before running, and returns
a list of pyopencl.Event.
:param context: PyOpenCL context
:return: Function for sparse (M @ v) operation where M is in CSR format
"""
spmv_source = '''
int start = m_row_ptr[i];
int stop = m_row_ptr[i+1];

Loading…
Cancel
Save