Compare commits

..

No commits in common. "master" and "v0.4" have entirely different histories.
master ... v0.4

5 changed files with 104 additions and 159 deletions

View File

@ -37,7 +37,7 @@
- jinja2
"""
from .main import cg_solver as cg_solver
from .main import cg_solver
__author__ = 'Jan Petykiewicz'
__version__ = '0.4'

View File

@ -14,23 +14,23 @@ satisfy the constraints for the 'conjugate gradient' algorithm
(positive definite, symmetric) and some that don't.
"""
from typing import Any, TYPE_CHECKING
from typing import Dict, Any, Optional
import time
import logging
import numpy
from numpy.typing import NDArray, ArrayLike
from numpy.linalg import norm
from numpy import complexfloating
import pyopencl
import pyopencl.array
import scipy
import meanas.fdfd.solvers
from . import ops
if TYPE_CHECKING:
import scipy
__author__ = 'Jan Petykiewicz'
logger = logging.getLogger(__name__)
@ -58,9 +58,9 @@ def cg(
b: ArrayLike,
max_iters: int = 10000,
err_threshold: float = 1e-6,
context: pyopencl.Context | None = None,
queue: pyopencl.CommandQueue | None = None,
) -> NDArray[complexfloating]:
context: Optional[pyopencl.Context] = None,
queue: Optional[pyopencl.CommandQueue] = None,
) -> NDArray:
"""
General conjugate-gradient solver for sparse matrices, where A @ x = b.
@ -84,10 +84,10 @@ def cg(
if queue is None:
queue = pyopencl.CommandQueue(context)
def load_field(v: NDArray[numpy.complexfloating], dtype: type = numpy.complex128) -> pyopencl.array.Array:
def load_field(v, dtype=numpy.complex128):
return pyopencl.array.to_device(queue, v.astype(dtype))
r = load_field(numpy.asarray(b))
r = load_field(b)
x = pyopencl.array.zeros_like(r)
v = pyopencl.array.zeros_like(r)
p = pyopencl.array.zeros_like(r)
@ -98,18 +98,18 @@ def cg(
m = CSRMatrix(queue, A)
#
# Generate OpenCL kernels
#
'''
Generate OpenCL kernels
'''
a_step = ops.create_a_csr(context)
xr_step = ops.create_xr_step(context)
rhoerr_step = ops.create_rhoerr_step(context)
p_step = ops.create_p_step(context)
dot = ops.create_dot(context)
#
# Start the solve
#
'''
Start the solve
'''
start_time2 = time.perf_counter()
_, err2 = rhoerr_step(r, [])
@ -139,9 +139,9 @@ def cg(
if k % 1000 == 0:
logger.info(f'iteration {k}')
#
# Done solving
#
'''
Done solving
'''
time_elapsed = time.perf_counter() - start_time
x = x.get()
@ -160,9 +160,9 @@ def cg(
def fdfd_cg_solver(
solver_opts: dict[str, Any] | None = None,
**fdfd_args,
) -> NDArray[complexfloating]:
solver_opts: Optional[Dict[str, Any]] = None,
**fdfd_args
) -> NDArray:
"""
Conjugate gradient FDFD solver using CSR sparse matrices, mainly for
testing and development since it's much slower than the solver in main.py.

View File

@ -5,13 +5,14 @@ This file holds the default FDFD solver, which uses an E-field wave
operator implemented directly as OpenCL arithmetic (rather than as
a matrix).
"""
from typing import List, Optional, cast
import time
import logging
import numpy
from numpy.typing import NDArray, ArrayLike
from numpy.linalg import norm
from numpy import floating, complexfloating
import pyopencl
import pyopencl.array
@ -20,21 +21,23 @@ import meanas.fdfd.operators
from . import ops
__author__ = 'Jan Petykiewicz'
logger = logging.getLogger(__name__)
def cg_solver(
omega: complex,
dxes: list[list[NDArray[floating | complexfloating]]],
dxes: List[List[NDArray]],
J: ArrayLike,
epsilon: ArrayLike,
mu: ArrayLike | None = None,
pec: ArrayLike | None = None,
pmc: ArrayLike | None = None,
mu: Optional[ArrayLike] = None,
pec: Optional[ArrayLike] = None,
pmc: Optional[ArrayLike] = None,
adjoint: bool = False,
max_iters: int = 40000,
err_threshold: float = 1e-6,
context: pyopencl.Context | None = None,
context: Optional[pyopencl.Context] = None,
) -> NDArray:
"""
OpenCL FDFD solver using the iterative conjugate gradient (cg) method
@ -68,7 +71,7 @@ def cg_solver(
shape = [dd.size for dd in dxes[0]]
b = -1j * omega * numpy.asarray(J)
b = -1j * omega * numpy.array(J, copy=False)
'''
** In this comment, I use the following notation:
@ -97,8 +100,7 @@ def cg_solver(
We can accomplish all this simply by conjugating everything (except J) and
reversing the order of L and R
'''
epsilon = numpy.asarray(epsilon)
epsilon = numpy.array(epsilon, copy=False)
if adjoint:
# Conjugate everything
dxes = [[numpy.conj(dd) for dd in dds] for dds in dxes]
@ -106,20 +108,23 @@ def cg_solver(
epsilon = numpy.conj(epsilon)
if mu is not None:
mu = numpy.conj(mu)
assert isinstance(epsilon, NDArray[floating] | NDArray[complexfloating])
L, R = meanas.fdfd.operators.e_full_preconditioners(dxes)
b_preconditioned = (R if adjoint else L) @ b
#
# Allocate GPU memory and load in data
#
if adjoint:
b_preconditioned = R @ b
else:
b_preconditioned = L @ b
'''
Allocate GPU memory and load in data
'''
if context is None:
context = pyopencl.create_some_context(interactive=True)
queue = pyopencl.CommandQueue(context)
def load_field(v: NDArray[complexfloating | floating], dtype: type = numpy.complex128) -> pyopencl.array.Array:
def load_field(v, dtype=numpy.complex128):
return pyopencl.array.to_device(queue, v.astype(dtype))
r = load_field(b_preconditioned) # load preconditioned b into r
@ -132,31 +137,31 @@ def cg_solver(
rho = 1.0 + 0j
errs = []
inv_dxes = [[load_field(1 / numpy.asarray(dd)) for dd in dds] for dds in dxes]
oeps = load_field(-omega * omega * epsilon)
inv_dxes = [[load_field(1 / numpy.array(dd, copy=False)) for dd in dds] for dds in dxes]
oeps = load_field(-omega ** 2 * epsilon)
Pl = load_field(L.diagonal())
Pr = load_field(R.diagonal())
if mu is None:
invm = load_field(numpy.array([]))
else:
invm = load_field(1 / numpy.asarray(mu))
mu = numpy.asarray(mu)
invm = load_field(1 / numpy.array(mu, copy=False))
mu = numpy.array(mu, copy=False)
if pec is None:
gpec = load_field(numpy.array([]), dtype=numpy.int8)
else:
gpec = load_field(numpy.asarray(pec, dtype=bool), dtype=numpy.int8)
gpec = load_field(numpy.array(pec, dtype=bool, copy=False), dtype=numpy.int8)
if pmc is None:
gpmc = load_field(numpy.array([]), dtype=numpy.int8)
else:
gpmc = load_field(numpy.asarray(pmc, dtype=bool), dtype=numpy.int8)
gpmc = load_field(numpy.array(pmc, dtype=bool, copy=False), dtype=numpy.int8)
#
# Generate OpenCL kernels
#
has_mu, has_pec, has_pmc = (qq is not None for qq in (mu, pec, pmc))
'''
Generate OpenCL kernels
'''
has_mu, has_pec, has_pmc = [q is not None for q in (mu, pec, pmc)]
a_step_full = ops.create_a(context, shape, has_mu, has_pec, has_pmc)
xr_step = ops.create_xr_step(context)
@ -164,17 +169,12 @@ def cg_solver(
p_step = ops.create_p_step(context)
dot = ops.create_dot(context)
def a_step(
E: pyopencl.array.Array,
H: pyopencl.array.Array,
p: pyopencl.array.Array,
events: list[pyopencl.Event],
) -> list[pyopencl.Event]:
def a_step(E, H, p, events):
return a_step_full(E, H, p, inv_dxes, oeps, invm, gpec, gpmc, Pl, Pr, events)
#
# Start the solve
#
'''
Start the solve
'''
start_time2 = time.perf_counter()
_, err2 = rhoerr_step(r, [])
@ -207,13 +207,16 @@ def cg_solver(
if k % 1000 == 0:
logger.info(f'iteration {k}')
#
# Done solving
#
'''
Done solving
'''
time_elapsed = time.perf_counter() - start_time
# Undo preconditioners
x = ((Pl if adjoint else Pr) * x).get()
if adjoint:
x = (Pl * x).get()
else:
x = (Pr * x).get()
if success:
logger.info('Solve success')

View File

@ -7,11 +7,11 @@ kernels for use by the other solvers.
See kernels/ for any of the .cl files loaded in this file.
"""
from collections.abc import Callable, Sequence
from typing import List, Callable, Union, Type, Sequence, Optional, Tuple
import logging
import numpy
from numpy.typing import ArrayLike
from numpy.typing import NDArray, ArrayLike
import jinja2
import pyopencl
@ -20,25 +20,17 @@ from pyopencl.elementwise import ElementwiseKernel
from pyopencl.reduction import ReductionKernel
from .csr import CSRMatrix
logger = logging.getLogger(__name__)
class FDFDError(Exception):
""" Custom error for opencl_fdfd """
pass
# Create jinja2 env on module load
jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
# Return type for the create_opname(...) functions
operation = Callable[..., list[pyopencl.Event]]
operation = Callable[..., List[pyopencl.Event]]
def type_to_C(
float_type: type[numpy.floating | numpy.complexfloating],
float_type: Type,
) -> str:
"""
Returns a string corresponding to the C equivalent of a numpy type.
@ -50,37 +42,35 @@ def type_to_C(
string containing the corresponding C type (eg. 'double')
"""
types = {
numpy.float16: 'half',
numpy.float32: 'float',
numpy.float64: 'double',
numpy.complex64: 'cfloat_t',
numpy.complex128: 'cdouble_t',
}
if float_type not in types:
raise FDFDError('Unsupported type')
raise Exception('Unsupported type')
return types[float_type]
# Type names
ctype = type_to_C(numpy.complex128)
ctype_bare = 'cdouble'
# Preamble for all OpenCL code
preamble = f'''
preamble = '''
#define PYOPENCL_DEFINE_CDOUBLE
#include <pyopencl-complex.h>
//Defines to clean up operation and type names
#define ctype {ctype_bare}_t
#define zero {ctype_bare}_new(0.0, 0.0)
#define add {ctype_bare}_add
#define sub {ctype_bare}_sub
#define mul {ctype_bare}_mul
'''
#define ctype {ctype}_t
#define zero {ctype}_new(0.0, 0.0)
#define add {ctype}_add
#define sub {ctype}_sub
#define mul {ctype}_mul
'''.format(ctype=ctype_bare)
def ptrs(*args: str) -> list[str]:
def ptrs(*args: str) -> List[str]:
return [ctype + ' *' + s for s in args]
@ -129,9 +119,9 @@ def create_a(
des = [ctype + ' *inv_de' + a for a in 'xyz']
dhs = [ctype + ' *inv_dh' + a for a in 'xyz']
#
# Convert p to initial E (ie, apply right preconditioner and PEC)
#
'''
Convert p to initial E (ie, apply right preconditioner and PEC)
'''
p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec)
P2E_kernel = ElementwiseKernel(
context,
@ -141,9 +131,9 @@ def create_a(
arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg),
)
#
# Calculate intermediate H from intermediate E
#
'''
Calculate intermediate H from intermediate E
'''
e2h_source = jinja_env.get_template('e2h.cl').render(
mu=mu,
pmc=pmc,
@ -157,9 +147,9 @@ def create_a(
arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des),
)
#
# Calculate final E (including left preconditioner)
#
'''
Calculate final E (including left preconditioner)
'''
h2e_source = jinja_env.get_template('h2e.cl').render(
pec=pec,
common_cl=common_source,
@ -178,13 +168,13 @@ def create_a(
p: pyopencl.array.Array,
idxes: Sequence[Sequence[pyopencl.array.Array]],
oeps: pyopencl.array.Array,
inv_mu: pyopencl.array.Array | None,
pec: pyopencl.array.Array | None,
pmc: pyopencl.array.Array | None,
inv_mu: Optional[pyopencl.array.Array],
pec: Optional[pyopencl.array.Array],
pmc: Optional[pyopencl.array.Array],
Pl: pyopencl.array.Array,
Pr: pyopencl.array.Array,
e: list[pyopencl.Event],
) -> list[pyopencl.Event]:
e: List[pyopencl.Event],
) -> List[pyopencl.Event]:
e2 = P2E_kernel(E, p, Pr, pec, wait_for=e)
e2 = E2H_kernel(E, H, inv_mu, pmc, *idxes[0], wait_for=[e2])
e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2])
@ -236,14 +226,14 @@ def create_xr_step(context: pyopencl.Context) -> operation:
r: pyopencl.array.Array,
v: pyopencl.array.Array,
alpha: complex,
e: list[pyopencl.Event],
) -> list[pyopencl.Event]:
e: List[pyopencl.Event],
) -> List[pyopencl.Event]:
return [xr_kernel(x, p, r, v, alpha, wait_for=e)]
return xr_update
def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex, complex]]:
def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., Tuple[complex, complex]]:
"""
Return a function
ri_update(r, e)
@ -281,9 +271,9 @@ def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex
arguments=ctype + ' *r',
)
def ri_update(r: pyopencl.array.Array, e: list[pyopencl.Event]) -> tuple[complex, complex]:
def ri_update(r: pyopencl.array.Array, e: List[pyopencl.Event]) -> Tuple[complex, complex]:
g = ri_kernel(r, wait_for=e).astype(ri_dtype).get()
rr, ri, ii = (g[qq] for qq in 'xyz')
rr, ri, ii = [g[q] for q in 'xyz']
rho = rr + 2j * ri - ii
err = rr + ii
return rho, err
@ -324,7 +314,7 @@ def create_p_step(context: pyopencl.Context) -> operation:
p: pyopencl.array.Array,
r: pyopencl.array.Array,
beta: complex,
e: list[pyopencl.Event]) -> list[pyopencl.Event]:
e: List[pyopencl.Event]) -> List[pyopencl.Event]:
return [p_kernel(p, r, beta, wait_for=e)]
return p_update
@ -359,7 +349,7 @@ def create_dot(context: pyopencl.Context) -> Callable[..., complex]:
def dot(
p: pyopencl.array.Array,
v: pyopencl.array.Array,
e: list[pyopencl.Event],
e: List[pyopencl.Event],
) -> complex:
g = dot_kernel(p, v, wait_for=e)
return g.get()
@ -415,11 +405,11 @@ def create_a_csr(context: pyopencl.Context) -> operation:
)
def spmv(
v_out: pyopencl.array.Array,
m: CSRMatrix,
v_in: pyopencl.array.Array,
e: list[pyopencl.Event],
) -> list[pyopencl.Event]:
v_out,
m,
v_in,
e: List[pyopencl.Event],
) -> List[pyopencl.Event]:
return [spmv_kernel(v_out, m.row_ptr, m.col_ind, m.data, v_in, wait_for=e)]
return spmv

View File

@ -35,10 +35,10 @@ classifiers = [
"License :: OSI Approved :: GNU Affero General Public License v3",
"Topic :: Scientific/Engineering",
]
requires-python = ">=3.11"
requires-python = ">=3.8"
dynamic = ["version"]
dependencies = [
"numpy>=1.26",
"numpy~=1.21",
"pyopencl",
"jinja2",
"meanas>=0.5",
@ -46,51 +46,3 @@ dependencies = [
[tool.hatch.version]
path = "opencl_fdfd/__init__.py"
[tool.ruff]
exclude = [
".git",
"dist",
]
line-length = 145
indent-width = 4
lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
lint.select = [
"NPY", "E", "F", "W", "B", "ANN", "UP", "SLOT", "SIM", "LOG",
"C4", "ISC", "PIE", "PT", "RET", "TCH", "PTH", "INT",
"ARG", "PL", "R", "TRY",
"G010", "G101", "G201", "G202",
"Q002", "Q003", "Q004",
]
lint.ignore = [
#"ANN001", # No annotation
"ANN002", # *args
"ANN003", # **kwargs
"ANN401", # Any
"ANN101", # self: Self
"SIM108", # single-line if / else assignment
"RET504", # x=y+z; return x
"PIE790", # unnecessary pass
"ISC003", # non-implicit string concatenation
"C408", # dict(x=y) instead of {'x': y}
"PLR09", # Too many xxx
"PLR2004", # magic number
"PLC0414", # import x as x
"TRY003", # Long exception message
]
[[tool.mypy.overrides]]
module = [
"scipy",
"scipy.optimize",
"scipy.linalg",
"scipy.sparse",
"scipy.sparse.linalg",
"pyopencl",
"pyopencl.array",
"pyopencl.elementwise",
"pyopencl.reduction",
]
ignore_missing_imports = true