diff --git a/.gitignore b/.gitignore index 19e276a..2ec014d 100644 --- a/.gitignore +++ b/.gitignore @@ -60,6 +60,3 @@ target/ # PyCharm .idea/ - -.mypy_cache/ -.pytest_cache/ diff --git a/README.md b/README.md index 9347e21..896001e 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ electromagnetic solver implemented in Python and OpenCL. **Capabilities:** * Arbitrary distributions of the following: - * Dielectric constant (`epsilon`) - * Magnetic permeabilty (`mu`) - * Perfect electric conductor (`PEC`) - * Perfect magnetic conductor (`PMC`) + * Dielectric constant (```epsilon```) + * Magnetic permeabilty (```mu```) + * Perfect electric conductor (```PEC```) + * Perfect magnetic conductor (```PMC```) * Variable-sized rectangular grids * Stretched-coordinate PMLs (complex cell sizes allowed) @@ -17,10 +17,10 @@ Currently, only periodic boundary conditions are included. PEC/PMC boundaries can be implemented by drawing PEC/PMC cells near the edges. Bloch boundary conditions are not included but wouldn't be very hard to add. -The default solver `opencl_fdfd.cg_solver(...)` located in main.py +The default solver ```opencl_fdfd.cg_solver(...)``` located in main.py implements the E-field wave operator directly (ie, as a list of OpenCL instructions rather than a matrix). Additionally, there is a slower -(and slightly more versatile) solver in `csr.py` which attempts to solve +(and slightly more versatile) solver in ```csr.py``` which attempts to solve an arbitrary sparse matrix in compressed sparse row (CSR) format using the same conjugate gradient method as the default solver. The CSR solver is significantly slower, but can be very useful for testing alternative @@ -34,11 +34,11 @@ generalization to multiple GPUs should be pretty straightforward ## Installation **Dependencies:** -* python 3 (written and tested with 3.7) +* python 3 (written and tested with 3.5) * numpy * pyopencl * jinja2 -* [meanas](https://mpxd.net/code/jan/meanas) (>=0.5) +* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools) (>=0.2) Install with pip, via git: @@ -49,14 +49,14 @@ pip install git+https://mpxd.net/code/jan/opencl_fdfd.git@release ## Use -See the documentation for `opencl_fdfd.cg_solver(...)` +See the documentation for ```opencl_fdfd.cg_solver(...)``` (located in ```main.py```) for details about how to call the solver. The FDFD arguments are identical to those in -`meanas.solvers.generic(...)`, and a few solver-specific +```fdfd_tools.solvers.generic(...)```, and a few solver-specific arguments are available. - + An alternate (slower) FDFD solver and a general gpu-based sparse matrix -solver is available in `csr.py`. These aren't particularly +solver is available in ```csr.py```. These aren't particularly well-optimized, and something like [MAGMA](http://icl.cs.utk.edu/magma/index.html) would probably be a better choice if you absolutely need to solve arbitrary sparse matrices diff --git a/opencl_fdfd/LICENSE.md b/opencl_fdfd/LICENSE.md deleted file mode 120000 index 7eabdb1..0000000 --- a/opencl_fdfd/LICENSE.md +++ /dev/null @@ -1 +0,0 @@ -../LICENSE.md \ No newline at end of file diff --git a/opencl_fdfd/README.md b/opencl_fdfd/README.md deleted file mode 120000 index 32d46ee..0000000 --- a/opencl_fdfd/README.md +++ /dev/null @@ -1 +0,0 @@ -../README.md \ No newline at end of file diff --git a/opencl_fdfd/__init__.py b/opencl_fdfd/__init__.py index a6f9b28..a1d54c6 100644 --- a/opencl_fdfd/__init__.py +++ b/opencl_fdfd/__init__.py @@ -31,7 +31,7 @@ Dependencies: - - meanas ( https://mpxd.net/code/jan/meanas ) + - fdfd_tools ( https://mpxd.net/code/jan/fdfd_tools ) - numpy - pyopencl - jinja2 @@ -40,5 +40,5 @@ from .main import cg_solver __author__ = 'Jan Petykiewicz' -__version__ = '0.4' -version = __version__ + +version = '0.3' diff --git a/opencl_fdfd/csr.py b/opencl_fdfd/csr.py index 0f5a837..9761fc0 100644 --- a/opencl_fdfd/csr.py +++ b/opencl_fdfd/csr.py @@ -6,7 +6,7 @@ CSRMatrix sparse matrix representation. The FDFD solver (fdfd_cg_solver()) solves an FDFD problem by creating a sparse matrix representing the problem (using -meanas) and then passing it to cg(), which performs a +fdfd_tools) and then passing it to cg(), which performs a conjugate gradient solve. cg() is capable of solving arbitrary sparse matrices which @@ -14,18 +14,16 @@ satisfy the constraints for the 'conjugate gradient' algorithm (positive definite, symmetric) and some that don't. """ -from typing import Dict, Any, Optional +from typing import Dict, Any import time import logging import numpy -from numpy.typing import NDArray, ArrayLike from numpy.linalg import norm import pyopencl import pyopencl.array -import scipy -import meanas.fdfd.solvers +import fdfd_tools.solvers from . import ops @@ -35,45 +33,39 @@ __author__ = 'Jan Petykiewicz' logger = logging.getLogger(__name__) -class CSRMatrix: +class CSRMatrix(object): """ Matrix stored in Compressed Sparse Row format, in GPU RAM. """ - row_ptr: pyopencl.array.Array - col_ind: pyopencl.array.Array - data: pyopencl.array.Array + row_ptr = None # type: pyopencl.array.Array + col_ind = None # type: pyopencl.array.Array + data = None # type: pyopencl.array.Array - def __init__( - self, - queue: pyopencl.CommandQueue, - m: 'scipy.sparse.csr_matrix', - ) -> None: + def __init__(self, + queue: pyopencl.CommandQueue, + m: 'scipy.sparse.csr_matrix'): self.row_ptr = pyopencl.array.to_device(queue, m.indptr) self.col_ind = pyopencl.array.to_device(queue, m.indices) self.data = pyopencl.array.to_device(queue, m.data.astype(numpy.complex128)) -def cg( - A: 'scipy.sparse.csr_matrix', - b: ArrayLike, - max_iters: int = 10000, - err_threshold: float = 1e-6, - context: Optional[pyopencl.Context] = None, - queue: Optional[pyopencl.CommandQueue] = None, - ) -> NDArray: +def cg(A: 'scipy.sparse.csr_matrix', + b: numpy.ndarray, + max_iters: int = 10000, + err_threshold: float = 1e-6, + context: pyopencl.Context = None, + queue: pyopencl.CommandQueue = None, + ) -> numpy.ndarray: """ General conjugate-gradient solver for sparse matrices, where A @ x = b. - Args: - A: Matrix to solve (CSR format) - b: Right-hand side vector (dense ndarray) - max_iters: Maximum number of iterations - err_threshold: Error threshold for successful solve, relative to norm(b) - context: PyOpenCL context. Will be created if not given. - queue: PyOpenCL command queue. Will be created if not given. - - Returns: - Solution vector x; returned even if solve doesn't converge. + :param A: Matrix to solve (CSR format) + :param b: Right-hand side vector (dense ndarray) + :param max_iters: Maximum number of iterations + :param err_threshold: Error threshold for successful solve, relative to norm(b) + :param context: PyOpenCL context. Will be created if not given. + :param queue: PyOpenCL command queue. Will be created if not given. + :return: Solution vector x; returned even if solve doesn't converge. """ start_time = time.perf_counter() @@ -114,11 +106,11 @@ def cg( _, err2 = rhoerr_step(r, []) b_norm = numpy.sqrt(err2) - logging.debug(f'b_norm check: {b_norm}') + logging.debug('b_norm check: ', b_norm) success = False for k in range(max_iters): - logging.debug(f'[{k:06d}] rho {rho:.4} alpha {alpha:4.4}') + logging.debug('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha)) rho_prev = rho e = xr_step(x, p, r, v, alpha, []) @@ -126,7 +118,7 @@ def cg( errs += [numpy.sqrt(err2) / b_norm] - logging.debug(f'err {errs[-1]}') + logging.debug('err {}'.format(errs[-1])) if errs[-1] < err_threshold: success = True @@ -136,8 +128,8 @@ def cg( e = a_step(v, m, p, e) alpha = rho / dot(p, v, e) - if k % 1000 == 0: - logger.info(f'iteration {k}') + if verbose and k % 1000 == 0: + logging.info('iteration {}'.format(k)) ''' Done solving @@ -150,46 +142,38 @@ def cg( logging.info('Solve success') else: logging.warning('Solve failure') - logging.info(f'{k} iterations in {time_elapsed} sec: {k / time_elapsed} iterations/sec') - logging.debug(f'final error {errs[-1]}') - logging.debug(f'overhead {start_time2 - start_time} sec') + logging.info('{} iterations in {} sec: {} iterations/sec \ + '.format(k, time_elapsed, k / time_elapsed)) + logging.debug('final error {}'.format(errs[-1])) + logging.debug('overhead {} sec'.format(start_time2 - start_time)) - residual = norm(A @ x - b) / norm(b) - logging.info(f'Final residual: {residual}') + logging.info('Final residual: {}'.format(norm(A @ x - b) / norm(b))) return x -def fdfd_cg_solver( - solver_opts: Optional[Dict[str, Any]] = None, - **fdfd_args - ) -> NDArray: +def fdfd_cg_solver(solver_opts: Dict[str, Any] = None, + **fdfd_args + ) -> numpy.ndarray: """ Conjugate gradient FDFD solver using CSR sparse matrices, mainly for testing and development since it's much slower than the solver in main.py. - Calls meanas.fdfd.solvers.generic( - **fdfd_args, - matrix_solver=opencl_fdfd.csr.cg, - matrix_solver_opts=solver_opts, - ) + Calls fdfd_tools.solvers.generic(**fdfd_args, + matrix_solver=opencl_fdfd.csr.cg, + matrix_solver_opts=solver_opts) - Args: - solver_opts: Passed as matrix_solver_opts to fdfd_tools.solver.generic(...). - Default {}. - fdfd_args: Passed as **fdfd_args to fdfd_tools.solver.generic(...). - Should include all of the arguments **except** matrix_solver and matrix_solver_opts - - Returns: - E-field which solves the system. + :param solver_opts: Passed as matrix_solver_opts to fdfd_tools.solver.generic(...). + Default {}. + :param fdfd_args: Passed as **fdfd_args to fdfd_tools.solver.generic(...). + Should include all of the arguments **except** matrix_solver and matrix_solver_opts + :return: E-field which solves the system. """ if solver_opts is None: solver_opts = dict() - x = meanas.fdfd.solvers.generic( - matrix_solver=cg, - matrix_solver_opts=solver_opts, - **fdfd_args, - ) + x = fdfd_tools.solvers.generic(matrix_solver=cg, + matrix_solver_opts=solver_opts, + **fdfd_args) return x diff --git a/opencl_fdfd/main.py b/opencl_fdfd/main.py index 337b4e0..a48aec0 100644 --- a/opencl_fdfd/main.py +++ b/opencl_fdfd/main.py @@ -6,17 +6,16 @@ operator implemented directly as OpenCL arithmetic (rather than as a matrix). """ -from typing import List, Optional, cast +from typing import List import time import logging import numpy -from numpy.typing import NDArray, ArrayLike from numpy.linalg import norm import pyopencl import pyopencl.array -import meanas.fdfd.operators +import fdfd_tools.operators from . import ops @@ -26,52 +25,49 @@ __author__ = 'Jan Petykiewicz' logger = logging.getLogger(__name__) -def cg_solver( - omega: complex, - dxes: List[List[NDArray]], - J: ArrayLike, - epsilon: ArrayLike, - mu: Optional[ArrayLike] = None, - pec: Optional[ArrayLike] = None, - pmc: Optional[ArrayLike] = None, - adjoint: bool = False, - max_iters: int = 40000, - err_threshold: float = 1e-6, - context: Optional[pyopencl.Context] = None, - ) -> NDArray: +def cg_solver(omega: complex, + dxes: List[List[numpy.ndarray]], + J: numpy.ndarray, + epsilon: numpy.ndarray, + mu: numpy.ndarray = None, + pec: numpy.ndarray = None, + pmc: numpy.ndarray = None, + adjoint: bool = False, + max_iters: int = 40000, + err_threshold: float = 1e-6, + context: pyopencl.Context = None, + ) -> numpy.ndarray: """ OpenCL FDFD solver using the iterative conjugate gradient (cg) method and implementing the diagonalized E-field wave operator directly in OpenCL. All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays, - either use meanas.fdmath.vec() or numpy: + either use fdfd_tools.vec() or numpy: f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z]))) - Args: - omega: Complex frequency to solve at. - dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes) - J: Electric current distribution (at E-field locations) - epsilon: Dielectric constant distribution (at E-field locations) - mu: Magnetic permeability distribution (at H-field locations) - pec: Perfect electric conductor distribution - (at E-field locations; non-zero value indicates PEC is present) - pmc: Perfect magnetic conductor distribution - (at H-field locations; non-zero value indicates PMC is present) - adjoint: If true, solves the adjoint problem. - max_iters: Maximum number of iterations. Default 40,000. - err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success. - Default 1e-6. - context: PyOpenCL context to run in. If not given, construct a new context. - - Returns: - E-field which solves the system. Returned even if we did not converge. + :param omega: Complex frequency to solve at. + :param dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes) + :param J: Electric current distribution (at E-field locations) + :param epsilon: Dielectric constant distribution (at E-field locations) + :param mu: Magnetic permeability distribution (at H-field locations) + :param pec: Perfect electric conductor distribution + (at E-field locations; non-zero value indicates PEC is present) + :param pmc: Perfect magnetic conductor distribution + (at H-field locations; non-zero value indicates PMC is present) + :param adjoint: If true, solves the adjoint problem. + :param max_iters: Maximum number of iterations. Default 40,000. + :param err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success. + Default 1e-6. + :param context: PyOpenCL context to run in. If not given, construct a new context. + :return: E-field which solves the system. Returned even if we did not converge. """ + start_time = time.perf_counter() - shape = [dd.size for dd in dxes[0]] + b = -1j * omega * J - b = -1j * omega * numpy.array(J, copy=False) + shape = [d.size for d in dxes[0]] ''' ** In this comment, I use the following notation: @@ -100,16 +96,15 @@ def cg_solver( We can accomplish all this simply by conjugating everything (except J) and reversing the order of L and R ''' - epsilon = numpy.array(epsilon, copy=False) if adjoint: # Conjugate everything - dxes = [[numpy.conj(dd) for dd in dds] for dds in dxes] + dxes = [[numpy.conj(d) for d in dd] for dd in dxes] omega = numpy.conj(omega) epsilon = numpy.conj(epsilon) if mu is not None: mu = numpy.conj(mu) - L, R = meanas.fdfd.operators.e_full_preconditioners(dxes) + L, R = fdfd_tools.operators.e_full_preconditioners(dxes) if adjoint: b_preconditioned = R @ b @@ -137,7 +132,7 @@ def cg_solver( rho = 1.0 + 0j errs = [] - inv_dxes = [[load_field(1 / numpy.array(dd, copy=False)) for dd in dds] for dds in dxes] + inv_dxes = [[load_field(1 / d) for d in dd] for dd in dxes] oeps = load_field(-omega ** 2 * epsilon) Pl = load_field(L.diagonal()) Pr = load_field(R.diagonal()) @@ -145,18 +140,17 @@ def cg_solver( if mu is None: invm = load_field(numpy.array([])) else: - invm = load_field(1 / numpy.array(mu, copy=False)) - mu = numpy.array(mu, copy=False) + invm = load_field(1 / mu) if pec is None: gpec = load_field(numpy.array([]), dtype=numpy.int8) else: - gpec = load_field(numpy.array(pec, dtype=bool, copy=False), dtype=numpy.int8) + gpec = load_field(pec.astype(bool), dtype=numpy.int8) if pmc is None: gpmc = load_field(numpy.array([]), dtype=numpy.int8) else: - gpmc = load_field(numpy.array(pmc, dtype=bool, copy=False), dtype=numpy.int8) + gpmc = load_field(pmc.astype(bool), dtype=numpy.int8) ''' Generate OpenCL kernels @@ -179,13 +173,13 @@ def cg_solver( _, err2 = rhoerr_step(r, []) b_norm = numpy.sqrt(err2) - logging.debug(f'b_norm check: {b_norm}') + logging.debug('b_norm check: {}'.format(b_norm)) success = False for k in range(max_iters): do_print = (k % 100 == 0) if do_print: - logger.debug(f'[{k:06d}] rho {rho:.4} alpha {alpha:4.4}') + logger.debug('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha)) rho_prev = rho e = xr_step(x, p, r, v, alpha, []) @@ -194,7 +188,7 @@ def cg_solver( errs += [numpy.sqrt(err2) / b_norm] if do_print: - logger.debug(f'err {errs[-1]}') + logger.debug('err {}'.format(errs[-1])) if errs[-1] < err_threshold: success = True @@ -205,7 +199,7 @@ def cg_solver( alpha = rho / dot(p, v, e) if k % 1000 == 0: - logger.info(f'iteration {k}') + logger.info('iteration {}'.format(k)) ''' Done solving @@ -222,16 +216,15 @@ def cg_solver( logger.info('Solve success') else: logger.warning('Solve failure') - logger.info(f'{k} iterations in {time_elapsed} sec: {k / time_elapsed} iterations/sec') - logger.debug(f'final error {errs[-1]}') - logger.debug(f'overhead {start_time2 - start_time} sec') + logger.info('{} iterations in {} sec: {} iterations/sec \ + '.format(k, time_elapsed, k / time_elapsed)) + logger.debug('final error {}'.format(errs[-1])) + logger.debug('overhead {} sec'.format(start_time2 - start_time)) - A0 = meanas.fdfd.operators.e_full(omega, dxes, epsilon, mu).tocsr() + A0 = fdfd_tools.operators.e_full(omega, dxes, epsilon, mu).tocsr() if adjoint: # Remember we conjugated all the contents of A earlier A0 = A0.T - - residual = norm(A0 @ x - b) / norm(b) - logger.info(f'Post-everything residual: {residual}') + logger.info('Post-everything residual: {}'.format(norm(A0 @ x - b) / norm(b))) return x diff --git a/opencl_fdfd/ops.py b/opencl_fdfd/ops.py index 32f2400..2e0f160 100644 --- a/opencl_fdfd/ops.py +++ b/opencl_fdfd/ops.py @@ -7,11 +7,10 @@ kernels for use by the other solvers. See kernels/ for any of the .cl files loaded in this file. """ -from typing import List, Callable, Union, Type, Sequence, Optional, Tuple +from typing import List, Callable import logging import numpy -from numpy.typing import NDArray, ArrayLike import jinja2 import pyopencl @@ -29,17 +28,12 @@ jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) operation = Callable[..., List[pyopencl.Event]] -def type_to_C( - float_type: Type, - ) -> str: +def type_to_C(float_type: numpy.float32 or numpy.float64) -> str: """ Returns a string corresponding to the C equivalent of a numpy type. - Args: - float_type: numpy type: float32, float64, complex64, complex128 - - Returns: - string containing the corresponding C type (eg. 'double') + :param float_type: numpy type: float32, float64, complex64, complex128 + :return: string containing the corresponding C type (eg. 'double') """ types = { numpy.float32: 'float', @@ -74,13 +68,12 @@ def ptrs(*args: str) -> List[str]: return [ctype + ' *' + s for s in args] -def create_a( - context: pyopencl.Context, - shape: ArrayLike, - mu: bool = False, - pec: bool = False, - pmc: bool = False, - ) -> operation: +def create_a(context: pyopencl.Context, + shape: numpy.ndarray, + mu: bool = False, + pec: bool = False, + pmc: bool = False, + ) -> operation: """ Return a function which performs (A @ p), where A is the FDFD wave equation for E-field. @@ -101,15 +94,12 @@ def create_a( and returns a list of pyopencl.Event. - Args: - context: PyOpenCL context - shape: Dimensions of the E-field - mu: False iff (mu == 1) everywhere - pec: False iff no PEC anywhere - pmc: False iff no PMC anywhere - - Returns: - Function for computing (A @ p) + :param context: PyOpenCL context + :param shape: Dimensions of the E-field + :param mu: False iff (mu == 1) everywhere + :param pec: False iff no PEC anywhere + :param pmc: False iff no PMC anywhere + :return: Function for computing (A @ p) """ common_source = jinja_env.get_template('common.cl').render(shape=shape) @@ -123,67 +113,45 @@ def create_a( Convert p to initial E (ie, apply right preconditioner and PEC) ''' p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec) - P2E_kernel = ElementwiseKernel( - context, - name='P2E', - preamble=preamble, - operation=p2e_source, - arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg), - ) + P2E_kernel = ElementwiseKernel(context, + name='P2E', + preamble=preamble, + operation=p2e_source, + arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg)) ''' Calculate intermediate H from intermediate E ''' - e2h_source = jinja_env.get_template('e2h.cl').render( - mu=mu, - pmc=pmc, - common_cl=common_source, - ) - E2H_kernel = ElementwiseKernel( - context, - name='E2H', - preamble=preamble, - operation=e2h_source, - arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des), - ) + e2h_source = jinja_env.get_template('e2h.cl').render(mu=mu, + pmc=pmc, + common_cl=common_source) + E2H_kernel = ElementwiseKernel(context, + name='E2H', + preamble=preamble, + operation=e2h_source, + arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des)) ''' Calculate final E (including left preconditioner) ''' - h2e_source = jinja_env.get_template('h2e.cl').render( - pec=pec, - common_cl=common_source, - ) - H2E_kernel = ElementwiseKernel( - context, - name='H2E', - preamble=preamble, - operation=h2e_source, - arguments=', '.join(ptrs('E', 'H', 'oeps', 'Pl') + pec_arg + dhs), - ) + h2e_source = jinja_env.get_template('h2e.cl').render(pec=pec, + common_cl=common_source) + H2E_kernel = ElementwiseKernel(context, + name='H2E', + preamble=preamble, + operation=h2e_source, + arguments=', '.join(ptrs('E', 'H', 'oeps', 'Pl') + pec_arg + dhs)) - def spmv( - E: pyopencl.array.Array, - H: pyopencl.array.Array, - p: pyopencl.array.Array, - idxes: Sequence[Sequence[pyopencl.array.Array]], - oeps: pyopencl.array.Array, - inv_mu: Optional[pyopencl.array.Array], - pec: Optional[pyopencl.array.Array], - pmc: Optional[pyopencl.array.Array], - Pl: pyopencl.array.Array, - Pr: pyopencl.array.Array, - e: List[pyopencl.Event], - ) -> List[pyopencl.Event]: + def spmv(E, H, p, idxes, oeps, inv_mu, pec, pmc, Pl, Pr, e): e2 = P2E_kernel(E, p, Pr, pec, wait_for=e) e2 = E2H_kernel(E, H, inv_mu, pmc, *idxes[0], wait_for=[e2]) e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2]) return [e2] - logger.debug(f'Preamble: \n{preamble}') - logger.debug(f'p2e: \n{p2e_source}') - logger.debug(f'e2h: \n{e2h_source}') - logger.debug(f'h2e: \n{h2e_source}') + logger.debug('Preamble: \n{}'.format(preamble)) + logger.debug('p2e: \n{}'.format(p2e_source)) + logger.debug('e2h: \n{}'.format(e2h_source)) + logger.debug('h2e: \n{}'.format(h2e_source)) return spmv @@ -199,11 +167,8 @@ def create_xr_step(context: pyopencl.Context) -> operation: after waiting for all in the list e and returns a list of pyopencl.Event - Args: - context: PyOpenCL context - - Returns: - Function for performing x and r updates + :param context: PyOpenCL context + :return: Function for performing x and r updates """ update_xr_source = ''' x[i] = add(x[i], mul(alpha, p[i])); @@ -212,28 +177,19 @@ def create_xr_step(context: pyopencl.Context) -> operation: xr_args = ', '.join(ptrs('x', 'p', 'r', 'v') + [ctype + ' alpha']) - xr_kernel = ElementwiseKernel( - context, - name='XR', - preamble=preamble, - operation=update_xr_source, - arguments=xr_args, - ) + xr_kernel = ElementwiseKernel(context, + name='XR', + preamble=preamble, + operation=update_xr_source, + arguments=xr_args) - def xr_update( - x: pyopencl.array.Array, - p: pyopencl.array.Array, - r: pyopencl.array.Array, - v: pyopencl.array.Array, - alpha: complex, - e: List[pyopencl.Event], - ) -> List[pyopencl.Event]: + def xr_update(x, p, r, v, alpha, e): return [xr_kernel(x, p, r, v, alpha, wait_for=e)] return xr_update -def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., Tuple[complex, complex]]: +def create_rhoerr_step(context: pyopencl.Context) -> operation: """ Return a function ri_update(r, e) @@ -244,11 +200,8 @@ def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., Tuple[complex after waiting for all pyopencl.Event in the list e and returns a list of pyopencl.Event - Args: - context: PyOpenCL context - - Returns: - Function for performing x and r updates + :param context: PyOpenCL context + :return: Function for performing x and r updates """ update_ri_source = ''' @@ -260,18 +213,16 @@ def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., Tuple[complex # Use a vector type (double3) to make the reduction simpler ri_dtype = pyopencl.array.vec.double3 - ri_kernel = ReductionKernel( - context, - name='RHOERR', - preamble=preamble, - dtype_out=ri_dtype, - neutral='(double3)(0.0, 0.0, 0.0)', - map_expr=update_ri_source, - reduce_expr='a+b', - arguments=ctype + ' *r', - ) + ri_kernel = ReductionKernel(context, + name='RHOERR', + preamble=preamble, + dtype_out=ri_dtype, + neutral='(double3)(0.0, 0.0, 0.0)', + map_expr=update_ri_source, + reduce_expr='a+b', + arguments=ctype + ' *r') - def ri_update(r: pyopencl.array.Array, e: List[pyopencl.Event]) -> Tuple[complex, complex]: + def ri_update(r, e): g = ri_kernel(r, wait_for=e).astype(ri_dtype).get() rr, ri, ii = [g[q] for q in 'xyz'] rho = rr + 2j * ri - ii @@ -291,66 +242,48 @@ def create_p_step(context: pyopencl.Context) -> operation: after waiting for all pyopencl.Event in the list e and returns a list of pyopencl.Event - Args: - context: PyOpenCL context - - Returns: - Function for performing the p update + :param context: PyOpenCL context + :return: Function for performing the p update """ update_p_source = ''' p[i] = add(r[i], mul(beta, p[i])); ''' p_args = ptrs('p', 'r') + [ctype + ' beta'] - p_kernel = ElementwiseKernel( - context, - name='P', - preamble=preamble, - operation=update_p_source, - arguments=', '.join(p_args), - ) + p_kernel = ElementwiseKernel(context, + name='P', + preamble=preamble, + operation=update_p_source, + arguments=', '.join(p_args)) - def p_update( - p: pyopencl.array.Array, - r: pyopencl.array.Array, - beta: complex, - e: List[pyopencl.Event]) -> List[pyopencl.Event]: + def p_update(p, r, beta, e): return [p_kernel(p, r, beta, wait_for=e)] return p_update -def create_dot(context: pyopencl.Context) -> Callable[..., complex]: +def create_dot(context: pyopencl.Context) -> operation: """ Return a function for performing the dot product p @ v with the signature - dot(p, v, e) -> complex + dot(p, v, e) -> float - Args: - context: PyOpenCL context - - Returns: - Function for performing the dot product + :param context: PyOpenCL context + :return: Function for performing the dot product """ dot_dtype = numpy.complex128 - dot_kernel = ReductionKernel( - context, - name='dot', - preamble=preamble, - dtype_out=dot_dtype, - neutral='zero', - map_expr='mul(p[i], v[i])', - reduce_expr='add(a, b)', - arguments=ptrs('p', 'v'), - ) + dot_kernel = ReductionKernel(context, + name='dot', + preamble=preamble, + dtype_out=dot_dtype, + neutral='zero', + map_expr='mul(p[i], v[i])', + reduce_expr='add(a, b)', + arguments=ptrs('p', 'v')) - def dot( - p: pyopencl.array.Array, - v: pyopencl.array.Array, - e: List[pyopencl.Event], - ) -> complex: + def dot(p, v, e): g = dot_kernel(p, v, wait_for=e) return g.get() @@ -371,11 +304,8 @@ def create_a_csr(context: pyopencl.Context) -> operation: The function waits on all the pyopencl.Event in e before running, and returns a list of pyopencl.Event. - Args: - context: PyOpenCL context - - Returns: - Function for sparse (M @ v) operation where M is in CSR format + :param context: PyOpenCL context + :return: Function for sparse (M @ v) operation where M is in CSR format """ spmv_source = ''' int start = m_row_ptr[i]; @@ -396,20 +326,13 @@ def create_a_csr(context: pyopencl.Context) -> operation: m_args = 'int *m_row_ptr, int *m_col_ind, ' + ctype + ' *m_data' v_in_args = ctype + ' *v_in' - spmv_kernel = ElementwiseKernel( - context, - name='csr_spmv', - preamble=preamble, - operation=spmv_source, - arguments=', '.join((v_out_args, m_args, v_in_args)), - ) + spmv_kernel = ElementwiseKernel(context, + name='csr_spmv', + preamble=preamble, + operation=spmv_source, + arguments=', '.join((v_out_args, m_args, v_in_args))) - def spmv( - v_out, - m, - v_in, - e: List[pyopencl.Event], - ) -> List[pyopencl.Event]: + def spmv(v_out, m, v_in, e): return [spmv_kernel(v_out, m.row_ptr, m.col_ind, m.data, v_in, wait_for=e)] return spmv diff --git a/opencl_fdfd/py.typed b/opencl_fdfd/py.typed deleted file mode 100644 index e69de29..0000000 diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index a9430f5..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,48 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "opencl_fdfd" -description = "OpenCL FDFD solver" -readme = "README.md" -license = { file = "LICENSE.md" } -authors = [ - { name="Jan Petykiewicz", email="jan@mpxd.net" }, - ] -homepage = "https://mpxd.net/code/jan/opencl_fdfd" -repository = "https://mpxd.net/code/jan/opencl_fdfd" -keywords = [ - "FDFD", - "finite", - "difference", - "frequency", - "domain", - "simulation", - "optics", - "electromagnetic", - "dielectric", - "PML", - "solver", - "FDTD", - ] -classifiers = [ - "Programming Language :: Python :: 3", - "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - "Intended Audience :: Manufacturing", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: GNU Affero General Public License v3", - "Topic :: Scientific/Engineering", - ] -requires-python = ">=3.8" -dynamic = ["version"] -dependencies = [ - "numpy~=1.21", - "pyopencl", - "jinja2", - "meanas>=0.5", - ] - -[tool.hatch.version] -path = "opencl_fdfd/__init__.py" diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..1cbf8f0 --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 + +from setuptools import setup, find_packages +import opencl_fdfd + +with open('README.md', 'r') as f: + long_description = f.read() + +setup(name='opencl_fdfd', + version=opencl_fdfd.version, + description='OpenCL FDFD solver', + long_description=long_description, + long_description_content_type='text/markdown', + author='Jan Petykiewicz', + author_email='anewusername@gmail.com', + url='https://mpxd.net/code/jan/opencl_fdfd', + packages=find_packages(), + package_data={ + 'opencl_fdfd': ['kernels/*'] + }, + install_requires=[ + 'numpy', + 'pyopencl', + 'jinja2', + 'fdfd_tools>=0.3', + ], + extras_require={ + }, + ) +