Compare commits

...

8 Commits

5 changed files with 158 additions and 104 deletions

View File

@ -37,7 +37,7 @@
- jinja2 - jinja2
""" """
from .main import cg_solver from .main import cg_solver as cg_solver
__author__ = 'Jan Petykiewicz' __author__ = 'Jan Petykiewicz'
__version__ = '0.4' __version__ = '0.4'

View File

@ -14,23 +14,23 @@ satisfy the constraints for the 'conjugate gradient' algorithm
(positive definite, symmetric) and some that don't. (positive definite, symmetric) and some that don't.
""" """
from typing import Dict, Any, Optional from typing import Any, TYPE_CHECKING
import time import time
import logging import logging
import numpy import numpy
from numpy.typing import NDArray, ArrayLike from numpy.typing import NDArray, ArrayLike
from numpy.linalg import norm from numpy.linalg import norm
from numpy import complexfloating
import pyopencl import pyopencl
import pyopencl.array import pyopencl.array
import scipy
import meanas.fdfd.solvers import meanas.fdfd.solvers
from . import ops from . import ops
if TYPE_CHECKING:
import scipy
__author__ = 'Jan Petykiewicz'
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -58,9 +58,9 @@ def cg(
b: ArrayLike, b: ArrayLike,
max_iters: int = 10000, max_iters: int = 10000,
err_threshold: float = 1e-6, err_threshold: float = 1e-6,
context: Optional[pyopencl.Context] = None, context: pyopencl.Context | None = None,
queue: Optional[pyopencl.CommandQueue] = None, queue: pyopencl.CommandQueue | None = None,
) -> NDArray: ) -> NDArray[complexfloating]:
""" """
General conjugate-gradient solver for sparse matrices, where A @ x = b. General conjugate-gradient solver for sparse matrices, where A @ x = b.
@ -84,10 +84,10 @@ def cg(
if queue is None: if queue is None:
queue = pyopencl.CommandQueue(context) queue = pyopencl.CommandQueue(context)
def load_field(v, dtype=numpy.complex128): def load_field(v: NDArray[numpy.complexfloating], dtype: type = numpy.complex128) -> pyopencl.array.Array:
return pyopencl.array.to_device(queue, v.astype(dtype)) return pyopencl.array.to_device(queue, v.astype(dtype))
r = load_field(b) r = load_field(numpy.asarray(b))
x = pyopencl.array.zeros_like(r) x = pyopencl.array.zeros_like(r)
v = pyopencl.array.zeros_like(r) v = pyopencl.array.zeros_like(r)
p = pyopencl.array.zeros_like(r) p = pyopencl.array.zeros_like(r)
@ -98,18 +98,18 @@ def cg(
m = CSRMatrix(queue, A) m = CSRMatrix(queue, A)
''' #
Generate OpenCL kernels # Generate OpenCL kernels
''' #
a_step = ops.create_a_csr(context) a_step = ops.create_a_csr(context)
xr_step = ops.create_xr_step(context) xr_step = ops.create_xr_step(context)
rhoerr_step = ops.create_rhoerr_step(context) rhoerr_step = ops.create_rhoerr_step(context)
p_step = ops.create_p_step(context) p_step = ops.create_p_step(context)
dot = ops.create_dot(context) dot = ops.create_dot(context)
''' #
Start the solve # Start the solve
''' #
start_time2 = time.perf_counter() start_time2 = time.perf_counter()
_, err2 = rhoerr_step(r, []) _, err2 = rhoerr_step(r, [])
@ -139,9 +139,9 @@ def cg(
if k % 1000 == 0: if k % 1000 == 0:
logger.info(f'iteration {k}') logger.info(f'iteration {k}')
''' #
Done solving # Done solving
''' #
time_elapsed = time.perf_counter() - start_time time_elapsed = time.perf_counter() - start_time
x = x.get() x = x.get()
@ -160,9 +160,9 @@ def cg(
def fdfd_cg_solver( def fdfd_cg_solver(
solver_opts: Optional[Dict[str, Any]] = None, solver_opts: dict[str, Any] | None = None,
**fdfd_args **fdfd_args,
) -> NDArray: ) -> NDArray[complexfloating]:
""" """
Conjugate gradient FDFD solver using CSR sparse matrices, mainly for Conjugate gradient FDFD solver using CSR sparse matrices, mainly for
testing and development since it's much slower than the solver in main.py. testing and development since it's much slower than the solver in main.py.

View File

@ -5,14 +5,13 @@ This file holds the default FDFD solver, which uses an E-field wave
operator implemented directly as OpenCL arithmetic (rather than as operator implemented directly as OpenCL arithmetic (rather than as
a matrix). a matrix).
""" """
from typing import List, Optional, cast
import time import time
import logging import logging
import numpy import numpy
from numpy.typing import NDArray, ArrayLike from numpy.typing import NDArray, ArrayLike
from numpy.linalg import norm from numpy.linalg import norm
from numpy import floating, complexfloating
import pyopencl import pyopencl
import pyopencl.array import pyopencl.array
@ -21,23 +20,21 @@ import meanas.fdfd.operators
from . import ops from . import ops
__author__ = 'Jan Petykiewicz'
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def cg_solver( def cg_solver(
omega: complex, omega: complex,
dxes: List[List[NDArray]], dxes: list[list[NDArray[floating | complexfloating]]],
J: ArrayLike, J: ArrayLike,
epsilon: ArrayLike, epsilon: ArrayLike,
mu: Optional[ArrayLike] = None, mu: ArrayLike | None = None,
pec: Optional[ArrayLike] = None, pec: ArrayLike | None = None,
pmc: Optional[ArrayLike] = None, pmc: ArrayLike | None = None,
adjoint: bool = False, adjoint: bool = False,
max_iters: int = 40000, max_iters: int = 40000,
err_threshold: float = 1e-6, err_threshold: float = 1e-6,
context: Optional[pyopencl.Context] = None, context: pyopencl.Context | None = None,
) -> NDArray: ) -> NDArray:
""" """
OpenCL FDFD solver using the iterative conjugate gradient (cg) method OpenCL FDFD solver using the iterative conjugate gradient (cg) method
@ -71,7 +68,7 @@ def cg_solver(
shape = [dd.size for dd in dxes[0]] shape = [dd.size for dd in dxes[0]]
b = -1j * omega * numpy.array(J, copy=False) b = -1j * omega * numpy.asarray(J)
''' '''
** In this comment, I use the following notation: ** In this comment, I use the following notation:
@ -100,7 +97,8 @@ def cg_solver(
We can accomplish all this simply by conjugating everything (except J) and We can accomplish all this simply by conjugating everything (except J) and
reversing the order of L and R reversing the order of L and R
''' '''
epsilon = numpy.array(epsilon, copy=False) epsilon = numpy.asarray(epsilon)
if adjoint: if adjoint:
# Conjugate everything # Conjugate everything
dxes = [[numpy.conj(dd) for dd in dds] for dds in dxes] dxes = [[numpy.conj(dd) for dd in dds] for dds in dxes]
@ -108,23 +106,20 @@ def cg_solver(
epsilon = numpy.conj(epsilon) epsilon = numpy.conj(epsilon)
if mu is not None: if mu is not None:
mu = numpy.conj(mu) mu = numpy.conj(mu)
assert isinstance(epsilon, NDArray[floating] | NDArray[complexfloating])
L, R = meanas.fdfd.operators.e_full_preconditioners(dxes) L, R = meanas.fdfd.operators.e_full_preconditioners(dxes)
b_preconditioned = (R if adjoint else L) @ b
if adjoint: #
b_preconditioned = R @ b # Allocate GPU memory and load in data
else: #
b_preconditioned = L @ b
'''
Allocate GPU memory and load in data
'''
if context is None: if context is None:
context = pyopencl.create_some_context(interactive=True) context = pyopencl.create_some_context(interactive=True)
queue = pyopencl.CommandQueue(context) queue = pyopencl.CommandQueue(context)
def load_field(v, dtype=numpy.complex128): def load_field(v: NDArray[complexfloating | floating], dtype: type = numpy.complex128) -> pyopencl.array.Array:
return pyopencl.array.to_device(queue, v.astype(dtype)) return pyopencl.array.to_device(queue, v.astype(dtype))
r = load_field(b_preconditioned) # load preconditioned b into r r = load_field(b_preconditioned) # load preconditioned b into r
@ -137,31 +132,31 @@ def cg_solver(
rho = 1.0 + 0j rho = 1.0 + 0j
errs = [] errs = []
inv_dxes = [[load_field(1 / numpy.array(dd, copy=False)) for dd in dds] for dds in dxes] inv_dxes = [[load_field(1 / numpy.asarray(dd)) for dd in dds] for dds in dxes]
oeps = load_field(-omega ** 2 * epsilon) oeps = load_field(-omega * omega * epsilon)
Pl = load_field(L.diagonal()) Pl = load_field(L.diagonal())
Pr = load_field(R.diagonal()) Pr = load_field(R.diagonal())
if mu is None: if mu is None:
invm = load_field(numpy.array([])) invm = load_field(numpy.array([]))
else: else:
invm = load_field(1 / numpy.array(mu, copy=False)) invm = load_field(1 / numpy.asarray(mu))
mu = numpy.array(mu, copy=False) mu = numpy.asarray(mu)
if pec is None: if pec is None:
gpec = load_field(numpy.array([]), dtype=numpy.int8) gpec = load_field(numpy.array([]), dtype=numpy.int8)
else: else:
gpec = load_field(numpy.array(pec, dtype=bool, copy=False), dtype=numpy.int8) gpec = load_field(numpy.asarray(pec, dtype=bool), dtype=numpy.int8)
if pmc is None: if pmc is None:
gpmc = load_field(numpy.array([]), dtype=numpy.int8) gpmc = load_field(numpy.array([]), dtype=numpy.int8)
else: else:
gpmc = load_field(numpy.array(pmc, dtype=bool, copy=False), dtype=numpy.int8) gpmc = load_field(numpy.asarray(pmc, dtype=bool), dtype=numpy.int8)
''' #
Generate OpenCL kernels # Generate OpenCL kernels
''' #
has_mu, has_pec, has_pmc = [q is not None for q in (mu, pec, pmc)] has_mu, has_pec, has_pmc = (qq is not None for qq in (mu, pec, pmc))
a_step_full = ops.create_a(context, shape, has_mu, has_pec, has_pmc) a_step_full = ops.create_a(context, shape, has_mu, has_pec, has_pmc)
xr_step = ops.create_xr_step(context) xr_step = ops.create_xr_step(context)
@ -169,12 +164,17 @@ def cg_solver(
p_step = ops.create_p_step(context) p_step = ops.create_p_step(context)
dot = ops.create_dot(context) dot = ops.create_dot(context)
def a_step(E, H, p, events): def a_step(
E: pyopencl.array.Array,
H: pyopencl.array.Array,
p: pyopencl.array.Array,
events: list[pyopencl.Event],
) -> list[pyopencl.Event]:
return a_step_full(E, H, p, inv_dxes, oeps, invm, gpec, gpmc, Pl, Pr, events) return a_step_full(E, H, p, inv_dxes, oeps, invm, gpec, gpmc, Pl, Pr, events)
''' #
Start the solve # Start the solve
''' #
start_time2 = time.perf_counter() start_time2 = time.perf_counter()
_, err2 = rhoerr_step(r, []) _, err2 = rhoerr_step(r, [])
@ -207,16 +207,13 @@ def cg_solver(
if k % 1000 == 0: if k % 1000 == 0:
logger.info(f'iteration {k}') logger.info(f'iteration {k}')
''' #
Done solving # Done solving
''' #
time_elapsed = time.perf_counter() - start_time time_elapsed = time.perf_counter() - start_time
# Undo preconditioners # Undo preconditioners
if adjoint: x = ((Pl if adjoint else Pr) * x).get()
x = (Pl * x).get()
else:
x = (Pr * x).get()
if success: if success:
logger.info('Solve success') logger.info('Solve success')

View File

@ -7,11 +7,11 @@ kernels for use by the other solvers.
See kernels/ for any of the .cl files loaded in this file. See kernels/ for any of the .cl files loaded in this file.
""" """
from typing import List, Callable, Union, Type, Sequence, Optional, Tuple from collections.abc import Callable, Sequence
import logging import logging
import numpy import numpy
from numpy.typing import NDArray, ArrayLike from numpy.typing import ArrayLike
import jinja2 import jinja2
import pyopencl import pyopencl
@ -20,17 +20,25 @@ from pyopencl.elementwise import ElementwiseKernel
from pyopencl.reduction import ReductionKernel from pyopencl.reduction import ReductionKernel
from .csr import CSRMatrix
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class FDFDError(Exception):
""" Custom error for opencl_fdfd """
pass
# Create jinja2 env on module load # Create jinja2 env on module load
jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
# Return type for the create_opname(...) functions # Return type for the create_opname(...) functions
operation = Callable[..., List[pyopencl.Event]] operation = Callable[..., list[pyopencl.Event]]
def type_to_C( def type_to_C(
float_type: Type, float_type: type[numpy.floating | numpy.complexfloating],
) -> str: ) -> str:
""" """
Returns a string corresponding to the C equivalent of a numpy type. Returns a string corresponding to the C equivalent of a numpy type.
@ -49,29 +57,30 @@ def type_to_C(
numpy.complex128: 'cdouble_t', numpy.complex128: 'cdouble_t',
} }
if float_type not in types: if float_type not in types:
raise Exception('Unsupported type') raise FDFDError('Unsupported type')
return types[float_type] return types[float_type]
# Type names # Type names
ctype = type_to_C(numpy.complex128) ctype = type_to_C(numpy.complex128)
ctype_bare = 'cdouble' ctype_bare = 'cdouble'
# Preamble for all OpenCL code # Preamble for all OpenCL code
preamble = ''' preamble = f'''
#define PYOPENCL_DEFINE_CDOUBLE #define PYOPENCL_DEFINE_CDOUBLE
#include <pyopencl-complex.h> #include <pyopencl-complex.h>
//Defines to clean up operation and type names //Defines to clean up operation and type names
#define ctype {ctype}_t #define ctype {ctype_bare}_t
#define zero {ctype}_new(0.0, 0.0) #define zero {ctype_bare}_new(0.0, 0.0)
#define add {ctype}_add #define add {ctype_bare}_add
#define sub {ctype}_sub #define sub {ctype_bare}_sub
#define mul {ctype}_mul #define mul {ctype_bare}_mul
'''.format(ctype=ctype_bare) '''
def ptrs(*args: str) -> List[str]: def ptrs(*args: str) -> list[str]:
return [ctype + ' *' + s for s in args] return [ctype + ' *' + s for s in args]
@ -120,9 +129,9 @@ def create_a(
des = [ctype + ' *inv_de' + a for a in 'xyz'] des = [ctype + ' *inv_de' + a for a in 'xyz']
dhs = [ctype + ' *inv_dh' + a for a in 'xyz'] dhs = [ctype + ' *inv_dh' + a for a in 'xyz']
''' #
Convert p to initial E (ie, apply right preconditioner and PEC) # Convert p to initial E (ie, apply right preconditioner and PEC)
''' #
p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec) p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec)
P2E_kernel = ElementwiseKernel( P2E_kernel = ElementwiseKernel(
context, context,
@ -132,9 +141,9 @@ def create_a(
arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg), arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg),
) )
''' #
Calculate intermediate H from intermediate E # Calculate intermediate H from intermediate E
''' #
e2h_source = jinja_env.get_template('e2h.cl').render( e2h_source = jinja_env.get_template('e2h.cl').render(
mu=mu, mu=mu,
pmc=pmc, pmc=pmc,
@ -148,9 +157,9 @@ def create_a(
arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des), arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des),
) )
''' #
Calculate final E (including left preconditioner) # Calculate final E (including left preconditioner)
''' #
h2e_source = jinja_env.get_template('h2e.cl').render( h2e_source = jinja_env.get_template('h2e.cl').render(
pec=pec, pec=pec,
common_cl=common_source, common_cl=common_source,
@ -169,13 +178,13 @@ def create_a(
p: pyopencl.array.Array, p: pyopencl.array.Array,
idxes: Sequence[Sequence[pyopencl.array.Array]], idxes: Sequence[Sequence[pyopencl.array.Array]],
oeps: pyopencl.array.Array, oeps: pyopencl.array.Array,
inv_mu: Optional[pyopencl.array.Array], inv_mu: pyopencl.array.Array | None,
pec: Optional[pyopencl.array.Array], pec: pyopencl.array.Array | None,
pmc: Optional[pyopencl.array.Array], pmc: pyopencl.array.Array | None,
Pl: pyopencl.array.Array, Pl: pyopencl.array.Array,
Pr: pyopencl.array.Array, Pr: pyopencl.array.Array,
e: List[pyopencl.Event], e: list[pyopencl.Event],
) -> List[pyopencl.Event]: ) -> list[pyopencl.Event]:
e2 = P2E_kernel(E, p, Pr, pec, wait_for=e) e2 = P2E_kernel(E, p, Pr, pec, wait_for=e)
e2 = E2H_kernel(E, H, inv_mu, pmc, *idxes[0], wait_for=[e2]) e2 = E2H_kernel(E, H, inv_mu, pmc, *idxes[0], wait_for=[e2])
e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2]) e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2])
@ -227,14 +236,14 @@ def create_xr_step(context: pyopencl.Context) -> operation:
r: pyopencl.array.Array, r: pyopencl.array.Array,
v: pyopencl.array.Array, v: pyopencl.array.Array,
alpha: complex, alpha: complex,
e: List[pyopencl.Event], e: list[pyopencl.Event],
) -> List[pyopencl.Event]: ) -> list[pyopencl.Event]:
return [xr_kernel(x, p, r, v, alpha, wait_for=e)] return [xr_kernel(x, p, r, v, alpha, wait_for=e)]
return xr_update return xr_update
def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., Tuple[complex, complex]]: def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex, complex]]:
""" """
Return a function Return a function
ri_update(r, e) ri_update(r, e)
@ -272,9 +281,9 @@ def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., Tuple[complex
arguments=ctype + ' *r', arguments=ctype + ' *r',
) )
def ri_update(r: pyopencl.array.Array, e: List[pyopencl.Event]) -> Tuple[complex, complex]: def ri_update(r: pyopencl.array.Array, e: list[pyopencl.Event]) -> tuple[complex, complex]:
g = ri_kernel(r, wait_for=e).astype(ri_dtype).get() g = ri_kernel(r, wait_for=e).astype(ri_dtype).get()
rr, ri, ii = [g[q] for q in 'xyz'] rr, ri, ii = (g[qq] for qq in 'xyz')
rho = rr + 2j * ri - ii rho = rr + 2j * ri - ii
err = rr + ii err = rr + ii
return rho, err return rho, err
@ -315,7 +324,7 @@ def create_p_step(context: pyopencl.Context) -> operation:
p: pyopencl.array.Array, p: pyopencl.array.Array,
r: pyopencl.array.Array, r: pyopencl.array.Array,
beta: complex, beta: complex,
e: List[pyopencl.Event]) -> List[pyopencl.Event]: e: list[pyopencl.Event]) -> list[pyopencl.Event]:
return [p_kernel(p, r, beta, wait_for=e)] return [p_kernel(p, r, beta, wait_for=e)]
return p_update return p_update
@ -350,7 +359,7 @@ def create_dot(context: pyopencl.Context) -> Callable[..., complex]:
def dot( def dot(
p: pyopencl.array.Array, p: pyopencl.array.Array,
v: pyopencl.array.Array, v: pyopencl.array.Array,
e: List[pyopencl.Event], e: list[pyopencl.Event],
) -> complex: ) -> complex:
g = dot_kernel(p, v, wait_for=e) g = dot_kernel(p, v, wait_for=e)
return g.get() return g.get()
@ -406,11 +415,11 @@ def create_a_csr(context: pyopencl.Context) -> operation:
) )
def spmv( def spmv(
v_out, v_out: pyopencl.array.Array,
m, m: CSRMatrix,
v_in, v_in: pyopencl.array.Array,
e: List[pyopencl.Event], e: list[pyopencl.Event],
) -> List[pyopencl.Event]: ) -> list[pyopencl.Event]:
return [spmv_kernel(v_out, m.row_ptr, m.col_ind, m.data, v_in, wait_for=e)] return [spmv_kernel(v_out, m.row_ptr, m.col_ind, m.data, v_in, wait_for=e)]
return spmv return spmv

View File

@ -35,10 +35,10 @@ classifiers = [
"License :: OSI Approved :: GNU Affero General Public License v3", "License :: OSI Approved :: GNU Affero General Public License v3",
"Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering",
] ]
requires-python = ">=3.8" requires-python = ">=3.11"
dynamic = ["version"] dynamic = ["version"]
dependencies = [ dependencies = [
"numpy~=1.21", "numpy>=1.26",
"pyopencl", "pyopencl",
"jinja2", "jinja2",
"meanas>=0.5", "meanas>=0.5",
@ -46,3 +46,51 @@ dependencies = [
[tool.hatch.version] [tool.hatch.version]
path = "opencl_fdfd/__init__.py" path = "opencl_fdfd/__init__.py"
[tool.ruff]
exclude = [
".git",
"dist",
]
line-length = 145
indent-width = 4
lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
lint.select = [
"NPY", "E", "F", "W", "B", "ANN", "UP", "SLOT", "SIM", "LOG",
"C4", "ISC", "PIE", "PT", "RET", "TCH", "PTH", "INT",
"ARG", "PL", "R", "TRY",
"G010", "G101", "G201", "G202",
"Q002", "Q003", "Q004",
]
lint.ignore = [
#"ANN001", # No annotation
"ANN002", # *args
"ANN003", # **kwargs
"ANN401", # Any
"ANN101", # self: Self
"SIM108", # single-line if / else assignment
"RET504", # x=y+z; return x
"PIE790", # unnecessary pass
"ISC003", # non-implicit string concatenation
"C408", # dict(x=y) instead of {'x': y}
"PLR09", # Too many xxx
"PLR2004", # magic number
"PLC0414", # import x as x
"TRY003", # Long exception message
]
[[tool.mypy.overrides]]
module = [
"scipy",
"scipy.optimize",
"scipy.linalg",
"scipy.sparse",
"scipy.sparse.linalg",
"pyopencl",
"pyopencl.array",
"pyopencl.elementwise",
"pyopencl.reduction",
]
ignore_missing_imports = true