Compare commits
	
		
			1 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 235e0b6365 | 
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -60,6 +60,3 @@ target/ | ||||
| 
 | ||||
| # PyCharm | ||||
| .idea/ | ||||
| 
 | ||||
| .mypy_cache/ | ||||
| .pytest_cache/ | ||||
|  | ||||
							
								
								
									
										26
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								README.md
									
									
									
									
									
								
							| @ -6,10 +6,10 @@ electromagnetic solver implemented in Python and OpenCL. | ||||
| 
 | ||||
| **Capabilities:** | ||||
| * Arbitrary distributions of the following: | ||||
|     * Dielectric constant (`epsilon`) | ||||
|     * Magnetic permeabilty (`mu`) | ||||
|     * Perfect electric conductor (`PEC`) | ||||
|     * Perfect magnetic conductor (`PMC`) | ||||
|     * Dielectric constant (```epsilon```) | ||||
|     * Magnetic permeabilty (```mu```) | ||||
|     * Perfect electric conductor (```PEC```) | ||||
|     * Perfect magnetic conductor (```PMC```) | ||||
| * Variable-sized rectangular grids | ||||
|     * Stretched-coordinate PMLs (complex cell sizes allowed) | ||||
| 
 | ||||
| @ -17,10 +17,10 @@ Currently, only periodic boundary conditions are included. | ||||
| PEC/PMC boundaries can be implemented by drawing PEC/PMC cells near the edges. | ||||
| Bloch boundary conditions are not included but wouldn't be very hard to add. | ||||
| 
 | ||||
| The default solver `opencl_fdfd.cg_solver(...)` located in main.py | ||||
| The default solver ```opencl_fdfd.cg_solver(...)``` located in main.py | ||||
| implements the E-field wave operator directly (ie, as a list of OpenCL | ||||
| instructions rather than a matrix). Additionally, there is a slower | ||||
| (and slightly more versatile) solver in `csr.py` which attempts to solve | ||||
| (and slightly more versatile) solver in ```csr.py``` which attempts to solve | ||||
| an arbitrary sparse matrix in compressed sparse row (CSR) format using | ||||
| the same conjugate gradient method as the default solver. The CSR solver | ||||
| is significantly slower, but can be very useful for testing alternative | ||||
| @ -34,29 +34,29 @@ generalization to multiple GPUs should be pretty straightforward | ||||
| ## Installation | ||||
| 
 | ||||
| **Dependencies:** | ||||
| * python 3 (written and tested with 3.7) | ||||
| * python 3 (written and tested with 3.5)  | ||||
| * numpy | ||||
| * pyopencl | ||||
| * jinja2 | ||||
| * [meanas](https://mpxd.net/code/jan/meanas) (>=0.5) | ||||
| * [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools) (>=0.2) | ||||
| 
 | ||||
| 
 | ||||
| Install with pip, via git: | ||||
| ```bash | ||||
| pip install git+https://mpxd.net/code/jan/opencl_fdfd.git@release | ||||
| pip install git+https://mpxd.net/gogs/jan/opencl_fdfd.git@release | ||||
| ``` | ||||
| 
 | ||||
| 
 | ||||
| ## Use | ||||
| 
 | ||||
| See the documentation for `opencl_fdfd.cg_solver(...)` | ||||
| See the documentation for ```opencl_fdfd.cg_solver(...)``` | ||||
| (located in ```main.py```) for details about how to call the solver. | ||||
| The FDFD arguments are identical to those in | ||||
| `meanas.solvers.generic(...)`, and a few solver-specific | ||||
| ```fdfd_tools.solvers.generic(...)```, and a few solver-specific | ||||
| arguments are available. | ||||
| 
 | ||||
|   | ||||
| An alternate (slower) FDFD solver and a general gpu-based sparse matrix | ||||
| solver is available in `csr.py`. These aren't particularly | ||||
| solver is available in ```csr.py```. These aren't particularly | ||||
| well-optimized, and something like | ||||
| [MAGMA](http://icl.cs.utk.edu/magma/index.html) would probably be a | ||||
| better choice if you absolutely need to solve arbitrary sparse matrices | ||||
|  | ||||
| @ -1 +0,0 @@ | ||||
| ../LICENSE.md | ||||
| @ -1 +0,0 @@ | ||||
| ../README.md | ||||
| @ -31,14 +31,13 @@ | ||||
| 
 | ||||
| 
 | ||||
|   Dependencies: | ||||
|     - meanas    ( https://mpxd.net/code/jan/meanas ) | ||||
|     - fdfd_tools    ( https://mpxd.net/gogs/jan/fdfd_tools ) | ||||
|     - numpy | ||||
|     - pyopencl | ||||
|     - jinja2 | ||||
| """ | ||||
| 
 | ||||
| from .main import cg_solver as cg_solver | ||||
| from .main import cg_solver | ||||
| 
 | ||||
| __author__ = 'Jan Petykiewicz' | ||||
| __version__ = '0.4' | ||||
| version = __version__ | ||||
| 
 | ||||
|  | ||||
| @ -6,7 +6,7 @@ CSRMatrix sparse matrix representation. | ||||
| 
 | ||||
| The FDFD solver (fdfd_cg_solver()) solves an FDFD problem by | ||||
| creating a sparse matrix representing the problem (using | ||||
| meanas) and then passing it to cg(), which performs a | ||||
| fdfd_tools) and then passing it to cg(), which performs a | ||||
| conjugate gradient solve. | ||||
| 
 | ||||
| cg() is capable of solving arbitrary sparse matrices which | ||||
| @ -14,66 +14,54 @@ satisfy the constraints for the 'conjugate gradient' algorithm | ||||
| (positive definite, symmetric) and some that don't. | ||||
| """ | ||||
| 
 | ||||
| from typing import Any, TYPE_CHECKING | ||||
| from typing import Dict, Any | ||||
| import time | ||||
| import logging | ||||
| 
 | ||||
| import numpy | ||||
| from numpy.typing import NDArray, ArrayLike | ||||
| from numpy.linalg import norm | ||||
| from numpy import complexfloating | ||||
| import pyopencl | ||||
| import pyopencl.array | ||||
| import meanas.fdfd.solvers | ||||
| 
 | ||||
| import fdfd_tools.solvers | ||||
| 
 | ||||
| from . import ops | ||||
| 
 | ||||
| if TYPE_CHECKING: | ||||
|     import scipy | ||||
| 
 | ||||
| 
 | ||||
| logger = logging.getLogger(__name__) | ||||
| 
 | ||||
| 
 | ||||
| class CSRMatrix: | ||||
| class CSRMatrix(object): | ||||
|     """ | ||||
|     Matrix stored in Compressed Sparse Row format, in GPU RAM. | ||||
|     """ | ||||
|     row_ptr: pyopencl.array.Array | ||||
|     col_ind: pyopencl.array.Array | ||||
|     data: pyopencl.array.Array | ||||
|     row_ptr = None      # type: pyopencl.array.Array | ||||
|     col_ind = None      # type: pyopencl.array.Array | ||||
|     data = None         # type: pyopencl.array.Array | ||||
| 
 | ||||
|     def __init__( | ||||
|             self, | ||||
|             queue: pyopencl.CommandQueue, | ||||
|             m: 'scipy.sparse.csr_matrix', | ||||
|             ) -> None: | ||||
|     def __init__(self, | ||||
|                  queue: pyopencl.CommandQueue, | ||||
|                  m: 'scipy.sparse.csr_matrix'): | ||||
|         self.row_ptr = pyopencl.array.to_device(queue, m.indptr) | ||||
|         self.col_ind = pyopencl.array.to_device(queue, m.indices) | ||||
|         self.data = pyopencl.array.to_device(queue, m.data.astype(numpy.complex128)) | ||||
| 
 | ||||
| 
 | ||||
| def cg( | ||||
|         A: 'scipy.sparse.csr_matrix', | ||||
|         b: ArrayLike, | ||||
|         max_iters: int = 10000, | ||||
|         err_threshold: float = 1e-6, | ||||
|         context: pyopencl.Context | None = None, | ||||
|         queue: pyopencl.CommandQueue | None = None, | ||||
|         ) -> NDArray[complexfloating]: | ||||
| def cg(A: 'scipy.sparse.csr_matrix', | ||||
|        b: numpy.ndarray, | ||||
|        max_iters: int = 10000, | ||||
|        err_threshold: float = 1e-6, | ||||
|        context: pyopencl.Context = None, | ||||
|        queue: pyopencl.CommandQueue = None, | ||||
|        verbose: bool = False, | ||||
|        ) -> numpy.ndarray: | ||||
|     """ | ||||
|     General conjugate-gradient solver for sparse matrices, where A @ x = b. | ||||
| 
 | ||||
|     Args: | ||||
|         A: Matrix to solve (CSR format) | ||||
|         b: Right-hand side vector (dense ndarray) | ||||
|         max_iters: Maximum number of iterations | ||||
|         err_threshold: Error threshold for successful solve, relative to norm(b) | ||||
|         context: PyOpenCL context. Will be created if not given. | ||||
|         queue: PyOpenCL command queue. Will be created if not given. | ||||
| 
 | ||||
|     Returns: | ||||
|         Solution vector x; returned even if solve doesn't converge. | ||||
|     :param A: Matrix to solve (CSR format) | ||||
|     :param b: Right-hand side vector (dense ndarray) | ||||
|     :param max_iters: Maximum number of iterations | ||||
|     :param err_threshold: Error threshold for successful solve, relative to norm(b) | ||||
|     :param context: PyOpenCL context. Will be created if not given. | ||||
|     :param queue: PyOpenCL command queue. Will be created if not given. | ||||
|     :param verbose: Whether to print statistics to screen. | ||||
|     :return: Solution vector x; returned even if solve doesn't converge. | ||||
|     """ | ||||
| 
 | ||||
|     start_time = time.perf_counter() | ||||
| @ -84,10 +72,10 @@ def cg( | ||||
|     if queue is None: | ||||
|         queue = pyopencl.CommandQueue(context) | ||||
| 
 | ||||
|     def load_field(v: NDArray[numpy.complexfloating], dtype: type = numpy.complex128) -> pyopencl.array.Array: | ||||
|     def load_field(v, dtype=numpy.complex128): | ||||
|         return pyopencl.array.to_device(queue, v.astype(dtype)) | ||||
| 
 | ||||
|     r = load_field(numpy.asarray(b)) | ||||
|     r = load_field(b) | ||||
|     x = pyopencl.array.zeros_like(r) | ||||
|     v = pyopencl.array.zeros_like(r) | ||||
|     p = pyopencl.array.zeros_like(r) | ||||
| @ -98,27 +86,29 @@ def cg( | ||||
| 
 | ||||
|     m = CSRMatrix(queue, A) | ||||
| 
 | ||||
|     # | ||||
|     # Generate OpenCL kernels | ||||
|     # | ||||
|     ''' | ||||
|     Generate OpenCL kernels | ||||
|     ''' | ||||
|     a_step = ops.create_a_csr(context) | ||||
|     xr_step = ops.create_xr_step(context) | ||||
|     rhoerr_step = ops.create_rhoerr_step(context) | ||||
|     p_step = ops.create_p_step(context) | ||||
|     dot = ops.create_dot(context) | ||||
| 
 | ||||
|     # | ||||
|     # Start the solve | ||||
|     # | ||||
|     ''' | ||||
|     Start the solve | ||||
|     ''' | ||||
|     start_time2 = time.perf_counter() | ||||
| 
 | ||||
|     _, err2 = rhoerr_step(r, []) | ||||
|     b_norm = numpy.sqrt(err2) | ||||
|     logging.debug(f'b_norm check: {b_norm}') | ||||
|     if verbose: | ||||
|         print('b_norm check: ', b_norm) | ||||
| 
 | ||||
|     success = False | ||||
|     for k in range(max_iters): | ||||
|         logging.debug(f'[{k:06d}] rho {rho:.4} alpha {alpha:4.4}') | ||||
|         if verbose: | ||||
|             print('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha), end=' ') | ||||
| 
 | ||||
|         rho_prev = rho | ||||
|         e = xr_step(x, p, r, v, alpha, []) | ||||
| @ -126,7 +116,8 @@ def cg( | ||||
| 
 | ||||
|         errs += [numpy.sqrt(err2) / b_norm] | ||||
| 
 | ||||
|         logging.debug(f'err {errs[-1]}') | ||||
|         if verbose: | ||||
|             print('err', errs[-1]) | ||||
| 
 | ||||
|         if errs[-1] < err_threshold: | ||||
|             success = True | ||||
| @ -136,60 +127,53 @@ def cg( | ||||
|         e = a_step(v, m, p, e) | ||||
|         alpha = rho / dot(p, v, e) | ||||
| 
 | ||||
|         if k % 1000 == 0: | ||||
|             logger.info(f'iteration {k}') | ||||
|         if verbose and k % 1000 == 0: | ||||
|             print(k) | ||||
| 
 | ||||
|     # | ||||
|     # Done solving | ||||
|     # | ||||
|     ''' | ||||
|     Done solving | ||||
|     ''' | ||||
|     time_elapsed = time.perf_counter() - start_time | ||||
| 
 | ||||
|     x = x.get() | ||||
| 
 | ||||
|     if success: | ||||
|         logging.info('Solve success') | ||||
|     else: | ||||
|         logging.warning('Solve failure') | ||||
|     logging.info(f'{k} iterations in {time_elapsed} sec: {k / time_elapsed} iterations/sec') | ||||
|     logging.debug(f'final error {errs[-1]}') | ||||
|     logging.debug(f'overhead {start_time2 - start_time} sec') | ||||
|     if verbose: | ||||
|         if success: | ||||
|             print('Success', end='') | ||||
|         else: | ||||
|             print('Failure', end=', ') | ||||
|         print(', {} iterations in {} sec: {} iterations/sec \ | ||||
|                       '.format(k, time_elapsed, k / time_elapsed)) | ||||
|         print('final error', errs[-1]) | ||||
|         print('overhead {} sec'.format(start_time2 - start_time)) | ||||
| 
 | ||||
|     residual = norm(A @ x - b) / norm(b) | ||||
|     logging.info(f'Final residual: {residual}') | ||||
|         print('Final residual:', norm(A @ x - b) / norm(b)) | ||||
|     return x | ||||
| 
 | ||||
| 
 | ||||
| def fdfd_cg_solver( | ||||
|         solver_opts: dict[str, Any] | None = None, | ||||
|         **fdfd_args, | ||||
|         ) -> NDArray[complexfloating]: | ||||
| def fdfd_cg_solver(solver_opts: Dict[str, Any] = None, | ||||
|                    **fdfd_args | ||||
|                    ) -> numpy.ndarray: | ||||
|     """ | ||||
|     Conjugate gradient FDFD solver using CSR sparse matrices, mainly for | ||||
|      testing and development since it's much slower than the solver in main.py. | ||||
| 
 | ||||
|     Calls meanas.fdfd.solvers.generic( | ||||
|         **fdfd_args, | ||||
|         matrix_solver=opencl_fdfd.csr.cg, | ||||
|         matrix_solver_opts=solver_opts, | ||||
|         ) | ||||
|     Calls fdfd_tools.solvers.generic(**fdfd_args, | ||||
|                                      matrix_solver=opencl_fdfd.csr.cg, | ||||
|                                      matrix_solver_opts=solver_opts) | ||||
| 
 | ||||
|     Args: | ||||
|         solver_opts: Passed as matrix_solver_opts to fdfd_tools.solver.generic(...). | ||||
|             Default {}. | ||||
|         fdfd_args: Passed as **fdfd_args to fdfd_tools.solver.generic(...). | ||||
|             Should include all of the arguments **except** matrix_solver and matrix_solver_opts | ||||
| 
 | ||||
|     Returns: | ||||
|         E-field which solves the system. | ||||
|     :param solver_opts: Passed as matrix_solver_opts to fdfd_tools.solver.generic(...). | ||||
|         Default {}. | ||||
|     :param fdfd_args: Passed as **fdfd_args to fdfd_tools.solver.generic(...). | ||||
|         Should include all of the arguments **except** matrix_solver and matrix_solver_opts | ||||
|     :return: E-field which solves the system. | ||||
|     """ | ||||
| 
 | ||||
|     if solver_opts is None: | ||||
|         solver_opts = dict() | ||||
| 
 | ||||
|     x = meanas.fdfd.solvers.generic( | ||||
|         matrix_solver=cg, | ||||
|         matrix_solver_opts=solver_opts, | ||||
|         **fdfd_args, | ||||
|         ) | ||||
|     x = fdfd_tools.solvers.generic(matrix_solver=cg, | ||||
|                                    matrix_solver_opts=solver_opts, | ||||
|                                    **fdfd_args) | ||||
| 
 | ||||
|     return x | ||||
|  | ||||
| @ -31,7 +31,7 @@ __global char *pmc_z = pmc + ZZ; | ||||
| 
 | ||||
| 
 | ||||
| //Update H components; set them to 0 if PMC is enabled at that location. | ||||
| //Mu division and PMC conditional are only included if {mu} and {pmc} are true | ||||
| //Mu division and PMC conditional are only included if {{mu}} and {{pmc}} are true | ||||
| {% if pmc -%} | ||||
| if (pmc_x[i] != 0) { | ||||
|     Hx[i] = zero; | ||||
| @ -42,9 +42,9 @@ if (pmc_x[i] != 0) { | ||||
|     ctype Dyz = mul(sub(Ey[i + pz], Ey[i]), inv_dez[z]); | ||||
|     ctype x_curl = sub(Dzy, Dyz); | ||||
| 
 | ||||
|     {%- if mu %} | ||||
|     {%- if mu -%} | ||||
|     Hx[i] = mul(inv_mu_x[i], x_curl); | ||||
|     {%- else %} | ||||
|     {%- else -%} | ||||
|     Hx[i] = x_curl; | ||||
|     {%- endif %} | ||||
| } | ||||
| @ -59,9 +59,9 @@ if (pmc_y[i] != 0) { | ||||
|     ctype Dzx = mul(sub(Ez[i + px], Ez[i]), inv_dex[x]); | ||||
|     ctype y_curl = sub(Dxz, Dzx); | ||||
| 
 | ||||
|     {%- if mu %} | ||||
|     {%- if mu -%} | ||||
|     Hy[i] = mul(inv_mu_y[i], y_curl); | ||||
|     {%- else %} | ||||
|     {%- else -%} | ||||
|     Hy[i] = y_curl; | ||||
|     {%- endif %} | ||||
| } | ||||
| @ -76,9 +76,9 @@ if (pmc_z[i] != 0) { | ||||
|     ctype Dxy = mul(sub(Ex[i + py], Ex[i]), inv_dey[y]); | ||||
|     ctype z_curl = sub(Dyx, Dxy); | ||||
| 
 | ||||
|     {%- if mu %} | ||||
|     {%- if mu -%} | ||||
|     Hz[i] = mul(inv_mu_z[i], z_curl); | ||||
|     {%- else %} | ||||
|     {%- else -%} | ||||
|     Hz[i] = z_curl; | ||||
|     {%- endif %} | ||||
| } | ||||
|  | ||||
| @ -5,70 +5,67 @@ This file holds the default FDFD solver, which uses an E-field wave | ||||
| operator implemented directly as OpenCL arithmetic (rather than as | ||||
| a matrix). | ||||
| """ | ||||
| 
 | ||||
| from typing import List | ||||
| import time | ||||
| import logging | ||||
| 
 | ||||
| import numpy | ||||
| from numpy.typing import NDArray, ArrayLike | ||||
| from numpy.linalg import norm | ||||
| from numpy import floating, complexfloating | ||||
| import pyopencl | ||||
| import pyopencl.array | ||||
| 
 | ||||
| import meanas.fdfd.operators | ||||
| import fdfd_tools.operators | ||||
| 
 | ||||
| from . import ops | ||||
| 
 | ||||
| 
 | ||||
| logger = logging.getLogger(__name__) | ||||
| __author__ = 'Jan Petykiewicz' | ||||
| 
 | ||||
| 
 | ||||
| def cg_solver( | ||||
|         omega: complex, | ||||
|         dxes: list[list[NDArray[floating | complexfloating]]], | ||||
|         J: ArrayLike, | ||||
|         epsilon: ArrayLike, | ||||
|         mu: ArrayLike | None = None, | ||||
|         pec: ArrayLike | None = None, | ||||
|         pmc: ArrayLike | None = None, | ||||
|         adjoint: bool = False, | ||||
|         max_iters: int = 40000, | ||||
|         err_threshold: float = 1e-6, | ||||
|         context: pyopencl.Context | None = None, | ||||
|         ) -> NDArray: | ||||
| def cg_solver(omega: complex, | ||||
|               dxes: List[List[numpy.ndarray]], | ||||
|               J: numpy.ndarray, | ||||
|               epsilon: numpy.ndarray, | ||||
|               mu: numpy.ndarray = None, | ||||
|               pec: numpy.ndarray = None, | ||||
|               pmc: numpy.ndarray = None, | ||||
|               adjoint: bool = False, | ||||
|               max_iters: int = 40000, | ||||
|               err_threshold: float = 1e-6, | ||||
|               context: pyopencl.Context = None, | ||||
|               verbose: bool = False, | ||||
|               ) -> numpy.ndarray: | ||||
|     """ | ||||
|     OpenCL FDFD solver using the iterative conjugate gradient (cg) method | ||||
|      and implementing the diagonalized E-field wave operator directly in | ||||
|      OpenCL. | ||||
| 
 | ||||
|     All ndarray arguments should be 1D arrays. To linearize a list of 3 3D ndarrays, | ||||
|      either use meanas.fdmath.vec() or numpy: | ||||
|      either use fdfd_tools.vec() or numpy: | ||||
|      f_1D = numpy.hstack(tuple((fi.flatten(order='F') for fi in [f_x, f_y, f_z]))) | ||||
| 
 | ||||
|     Args: | ||||
|         omega: Complex frequency to solve at. | ||||
|         dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes) | ||||
|         J: Electric current distribution (at E-field locations) | ||||
|         epsilon: Dielectric constant distribution (at E-field locations) | ||||
|         mu: Magnetic permeability distribution (at H-field locations) | ||||
|         pec: Perfect electric conductor distribution | ||||
|             (at E-field locations; non-zero value indicates PEC is present) | ||||
|         pmc: Perfect magnetic conductor distribution | ||||
|             (at H-field locations; non-zero value indicates PMC is present) | ||||
|         adjoint: If true, solves the adjoint problem. | ||||
|         max_iters: Maximum number of iterations. Default 40,000. | ||||
|         err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success. | ||||
|             Default 1e-6. | ||||
|         context: PyOpenCL context to run in. If not given, construct a new context. | ||||
| 
 | ||||
|     Returns: | ||||
|         E-field which solves the system. Returned even if we did not converge. | ||||
|     :param omega: Complex frequency to solve at. | ||||
|     :param dxes: [[dx_e, dy_e, dz_e], [dx_h, dy_h, dz_h]] (complex cell sizes) | ||||
|     :param J: Electric current distribution (at E-field locations) | ||||
|     :param epsilon: Dielectric constant distribution (at E-field locations) | ||||
|     :param mu: Magnetic permeability distribution (at H-field locations) | ||||
|     :param pec: Perfect electric conductor distribution | ||||
|         (at E-field locations; non-zero value indicates PEC is present) | ||||
|     :param pmc: Perfect magnetic conductor distribution | ||||
|         (at H-field locations; non-zero value indicates PMC is present) | ||||
|     :param adjoint: If true, solves the adjoint problem. | ||||
|     :param max_iters: Maximum number of iterations. Default 40,000. | ||||
|     :param err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success. | ||||
|         Default 1e-6. | ||||
|     :param context: PyOpenCL context to run in. If not given, construct a new context. | ||||
|     :param verbose: If True, print progress to stdout. Default False. | ||||
|     :return: E-field which solves the system. Returned even if we did not converge. | ||||
|     """ | ||||
| 
 | ||||
|     start_time = time.perf_counter() | ||||
| 
 | ||||
|     shape = [dd.size for dd in dxes[0]] | ||||
|     b = -1j * omega * J | ||||
| 
 | ||||
|     b = -1j * omega * numpy.asarray(J) | ||||
|     shape = [d.size for d in dxes[0]] | ||||
| 
 | ||||
|     ''' | ||||
|         ** In this comment, I use the following notation: | ||||
| @ -97,29 +94,30 @@ def cg_solver( | ||||
|         We can accomplish all this simply by conjugating everything (except J) and | ||||
|          reversing the order of L and R | ||||
|     ''' | ||||
|     epsilon = numpy.asarray(epsilon) | ||||
| 
 | ||||
|     if adjoint: | ||||
|         # Conjugate everything | ||||
|         dxes = [[numpy.conj(dd) for dd in dds] for dds in dxes] | ||||
|         dxes = [[numpy.conj(d) for d in dd] for dd in dxes] | ||||
|         omega = numpy.conj(omega) | ||||
|         epsilon = numpy.conj(epsilon) | ||||
|         if mu is not None: | ||||
|             mu = numpy.conj(mu) | ||||
|     assert isinstance(epsilon, NDArray[floating] | NDArray[complexfloating]) | ||||
| 
 | ||||
|     L, R = meanas.fdfd.operators.e_full_preconditioners(dxes) | ||||
|     b_preconditioned = (R if adjoint else L) @ b | ||||
|     L, R = fdfd_tools.operators.e_full_preconditioners(dxes) | ||||
| 
 | ||||
|     # | ||||
|     # Allocate GPU memory and load in data | ||||
|     # | ||||
|     if adjoint: | ||||
|         b_preconditioned = R @ b | ||||
|     else: | ||||
|         b_preconditioned = L @ b | ||||
| 
 | ||||
|     ''' | ||||
|         Allocate GPU memory and load in data | ||||
|     ''' | ||||
|     if context is None: | ||||
|         context = pyopencl.create_some_context(interactive=True) | ||||
| 
 | ||||
|     queue = pyopencl.CommandQueue(context) | ||||
| 
 | ||||
|     def load_field(v: NDArray[complexfloating | floating], dtype: type = numpy.complex128) -> pyopencl.array.Array: | ||||
|     def load_field(v, dtype=numpy.complex128): | ||||
|         return pyopencl.array.to_device(queue, v.astype(dtype)) | ||||
| 
 | ||||
|     r = load_field(b_preconditioned)  # load preconditioned b into r | ||||
| @ -132,31 +130,30 @@ def cg_solver( | ||||
|     rho = 1.0 + 0j | ||||
|     errs = [] | ||||
| 
 | ||||
|     inv_dxes = [[load_field(1 / numpy.asarray(dd)) for dd in dds] for dds in dxes] | ||||
|     oeps = load_field(-omega * omega * epsilon) | ||||
|     inv_dxes = [[load_field(1 / d) for d in dd] for dd in dxes] | ||||
|     oeps = load_field(-omega ** 2 * epsilon) | ||||
|     Pl = load_field(L.diagonal()) | ||||
|     Pr = load_field(R.diagonal()) | ||||
| 
 | ||||
|     if mu is None: | ||||
|         invm = load_field(numpy.array([])) | ||||
|     else: | ||||
|         invm = load_field(1 / numpy.asarray(mu)) | ||||
|         mu = numpy.asarray(mu) | ||||
|         invm = load_field(1 / mu) | ||||
| 
 | ||||
|     if pec is None: | ||||
|         gpec = load_field(numpy.array([]), dtype=numpy.int8) | ||||
|     else: | ||||
|         gpec = load_field(numpy.asarray(pec, dtype=bool), dtype=numpy.int8) | ||||
|         gpec = load_field(pec.astype(bool), dtype=numpy.int8) | ||||
| 
 | ||||
|     if pmc is None: | ||||
|         gpmc = load_field(numpy.array([]), dtype=numpy.int8) | ||||
|     else: | ||||
|         gpmc = load_field(numpy.asarray(pmc, dtype=bool), dtype=numpy.int8) | ||||
|         gpmc = load_field(pmc.astype(bool), dtype=numpy.int8) | ||||
| 
 | ||||
|     # | ||||
|     # Generate OpenCL kernels | ||||
|     # | ||||
|     has_mu, has_pec, has_pmc = (qq is not None for qq in (mu, pec, pmc)) | ||||
|     ''' | ||||
|     Generate OpenCL kernels | ||||
|     ''' | ||||
|     has_mu, has_pec, has_pmc = [q is not None for q in (mu, pec, pmc)] | ||||
| 
 | ||||
|     a_step_full = ops.create_a(context, shape, has_mu, has_pec, has_pmc) | ||||
|     xr_step = ops.create_xr_step(context) | ||||
| @ -164,28 +161,22 @@ def cg_solver( | ||||
|     p_step = ops.create_p_step(context) | ||||
|     dot = ops.create_dot(context) | ||||
| 
 | ||||
|     def a_step( | ||||
|             E: pyopencl.array.Array, | ||||
|             H: pyopencl.array.Array, | ||||
|             p: pyopencl.array.Array, | ||||
|             events: list[pyopencl.Event], | ||||
|             ) -> list[pyopencl.Event]: | ||||
|     def a_step(E, H, p, events): | ||||
|         return a_step_full(E, H, p, inv_dxes, oeps, invm, gpec, gpmc, Pl, Pr, events) | ||||
| 
 | ||||
|     # | ||||
|     # Start the solve | ||||
|     # | ||||
|     ''' | ||||
|     Start the solve | ||||
|     ''' | ||||
|     start_time2 = time.perf_counter() | ||||
| 
 | ||||
|     _, err2 = rhoerr_step(r, []) | ||||
|     b_norm = numpy.sqrt(err2) | ||||
|     logging.debug(f'b_norm check: {b_norm}') | ||||
|     print('b_norm check: ', b_norm) | ||||
| 
 | ||||
|     success = False | ||||
|     for k in range(max_iters): | ||||
|         do_print = (k % 100 == 0) | ||||
|         if do_print: | ||||
|             logger.debug(f'[{k:06d}] rho {rho:.4} alpha {alpha:4.4}') | ||||
|         if verbose: | ||||
|             print('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha), end=' ') | ||||
| 
 | ||||
|         rho_prev = rho | ||||
|         e = xr_step(x, p, r, v, alpha, []) | ||||
| @ -193,8 +184,8 @@ def cg_solver( | ||||
| 
 | ||||
|         errs += [numpy.sqrt(err2) / b_norm] | ||||
| 
 | ||||
|         if do_print: | ||||
|             logger.debug(f'err {errs[-1]}') | ||||
|         if verbose: | ||||
|             print('err', errs[-1]) | ||||
| 
 | ||||
|         if errs[-1] < err_threshold: | ||||
|             success = True | ||||
| @ -205,30 +196,32 @@ def cg_solver( | ||||
|         alpha = rho / dot(p, v, e) | ||||
| 
 | ||||
|         if k % 1000 == 0: | ||||
|             logger.info(f'iteration {k}') | ||||
|             print(k) | ||||
| 
 | ||||
|     # | ||||
|     # Done solving | ||||
|     # | ||||
|     ''' | ||||
|     Done solving | ||||
|     ''' | ||||
|     time_elapsed = time.perf_counter() - start_time | ||||
| 
 | ||||
|     # Undo preconditioners | ||||
|     x = ((Pl if adjoint else Pr) * x).get() | ||||
|     if adjoint: | ||||
|         x = (Pl * x).get() | ||||
|     else: | ||||
|         x = (Pr * x).get() | ||||
| 
 | ||||
|     if success: | ||||
|         logger.info('Solve success') | ||||
|         print('Success', end='') | ||||
|     else: | ||||
|         logger.warning('Solve failure') | ||||
|     logger.info(f'{k} iterations in {time_elapsed} sec: {k / time_elapsed} iterations/sec') | ||||
|     logger.debug(f'final error {errs[-1]}') | ||||
|     logger.debug(f'overhead {start_time2 - start_time} sec') | ||||
|         print('Failure', end=', ') | ||||
|     print(', {} iterations in {} sec: {} iterations/sec \ | ||||
|                   '.format(k, time_elapsed, k / time_elapsed)) | ||||
|     print('final error', errs[-1]) | ||||
|     print('overhead {} sec'.format(start_time2 - start_time)) | ||||
| 
 | ||||
|     A0 = meanas.fdfd.operators.e_full(omega, dxes, epsilon, mu).tocsr() | ||||
|     A0 = fdfd_tools.operators.e_full(omega, dxes, epsilon, mu).tocsr() | ||||
|     if adjoint: | ||||
|         # Remember we conjugated all the contents of A earlier | ||||
|         A0 = A0.T | ||||
| 
 | ||||
|     residual = norm(A0 @ x - b) / norm(b) | ||||
|     logger.info(f'Post-everything residual: {residual}') | ||||
|     print('Post-everything residual:', norm(A0 @ x - b) / norm(b)) | ||||
|     return x | ||||
| 
 | ||||
|  | ||||
| @ -7,11 +7,9 @@ kernels for use by the other solvers. | ||||
| See kernels/ for any of the .cl files loaded in this file. | ||||
| """ | ||||
| 
 | ||||
| from collections.abc import Callable, Sequence | ||||
| import logging | ||||
| from typing import List, Callable | ||||
| 
 | ||||
| import numpy | ||||
| from numpy.typing import ArrayLike | ||||
| import jinja2 | ||||
| 
 | ||||
| import pyopencl | ||||
| @ -20,77 +18,59 @@ from pyopencl.elementwise import ElementwiseKernel | ||||
| from pyopencl.reduction import ReductionKernel | ||||
| 
 | ||||
| 
 | ||||
| from .csr import CSRMatrix | ||||
| 
 | ||||
| 
 | ||||
| logger = logging.getLogger(__name__) | ||||
| 
 | ||||
| 
 | ||||
| class FDFDError(Exception): | ||||
|     """ Custom error for opencl_fdfd """ | ||||
|     pass | ||||
| 
 | ||||
| # Create jinja2 env on module load | ||||
| jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) | ||||
| 
 | ||||
| # Return type for the create_opname(...) functions | ||||
| operation = Callable[..., list[pyopencl.Event]] | ||||
| operation = Callable[..., List[pyopencl.Event]] | ||||
| 
 | ||||
| 
 | ||||
| def type_to_C( | ||||
|         float_type: type[numpy.floating | numpy.complexfloating], | ||||
|         ) -> str: | ||||
| def type_to_C(float_type: numpy.float32 or numpy.float64) -> str: | ||||
|     """ | ||||
|     Returns a string corresponding to the C equivalent of a numpy type. | ||||
| 
 | ||||
|     Args: | ||||
|         float_type: numpy type: float32, float64, complex64, complex128 | ||||
| 
 | ||||
|     Returns: | ||||
|         string containing the corresponding C type (eg. 'double') | ||||
|     :param float_type: numpy type: float32, float64, complex64, complex128 | ||||
|     :return: string containing the corresponding C type (eg. 'double') | ||||
|     """ | ||||
|     types = { | ||||
|         numpy.float16: 'half', | ||||
|         numpy.float32: 'float', | ||||
|         numpy.float64: 'double', | ||||
|         numpy.complex64: 'cfloat_t', | ||||
|         numpy.complex128: 'cdouble_t', | ||||
|     } | ||||
|     if float_type not in types: | ||||
|         raise FDFDError('Unsupported type') | ||||
|         raise Exception('Unsupported type') | ||||
| 
 | ||||
|     return types[float_type] | ||||
| 
 | ||||
| 
 | ||||
| # Type names | ||||
| ctype = type_to_C(numpy.complex128) | ||||
| ctype_bare = 'cdouble' | ||||
| 
 | ||||
| # Preamble for all OpenCL code | ||||
| preamble = f''' | ||||
| preamble = ''' | ||||
| #define PYOPENCL_DEFINE_CDOUBLE | ||||
| #include <pyopencl-complex.h> | ||||
| 
 | ||||
| //Defines to clean up operation and type names | ||||
| #define ctype {ctype_bare}_t | ||||
| #define zero {ctype_bare}_new(0.0, 0.0) | ||||
| #define add {ctype_bare}_add | ||||
| #define sub {ctype_bare}_sub | ||||
| #define mul {ctype_bare}_mul | ||||
| ''' | ||||
| #define ctype {ctype}_t | ||||
| #define zero {ctype}_new(0.0, 0.0) | ||||
| #define add {ctype}_add | ||||
| #define sub {ctype}_sub | ||||
| #define mul {ctype}_mul | ||||
| '''.format(ctype=ctype_bare) | ||||
| 
 | ||||
| 
 | ||||
| def ptrs(*args: str) -> list[str]: | ||||
| def ptrs(*args: str) -> List[str]: | ||||
|     return [ctype + ' *' + s for s in args] | ||||
| 
 | ||||
| 
 | ||||
| def create_a( | ||||
|         context: pyopencl.Context, | ||||
|         shape: ArrayLike, | ||||
|         mu: bool = False, | ||||
|         pec: bool = False, | ||||
|         pmc: bool = False, | ||||
|         ) -> operation: | ||||
| def create_a(context: pyopencl.Context, | ||||
|              shape: numpy.ndarray, | ||||
|              mu: bool = False, | ||||
|              pec: bool = False, | ||||
|              pmc: bool = False, | ||||
|              ) -> operation: | ||||
|     """ | ||||
|     Return a function which performs (A @ p), where A is the FDFD wave equation for E-field. | ||||
| 
 | ||||
| @ -111,15 +91,12 @@ def create_a( | ||||
| 
 | ||||
|      and returns a list of pyopencl.Event. | ||||
| 
 | ||||
|     Args: | ||||
|         context: PyOpenCL context | ||||
|         shape: Dimensions of the E-field | ||||
|         mu: False iff (mu == 1) everywhere | ||||
|         pec: False iff no PEC anywhere | ||||
|         pmc: False iff no PMC anywhere | ||||
| 
 | ||||
|     Returns: | ||||
|         Function for computing (A @ p) | ||||
|     :param context: PyOpenCL context | ||||
|     :param shape: Dimensions of the E-field | ||||
|     :param mu: False iff (mu == 1) everywhere | ||||
|     :param pec: False iff no PEC anywhere | ||||
|     :param pmc: False iff no PMC anywhere | ||||
|     :return: Function for computing (A @ p) | ||||
|     """ | ||||
| 
 | ||||
|     common_source = jinja_env.get_template('common.cl').render(shape=shape) | ||||
| @ -129,72 +106,45 @@ def create_a( | ||||
|     des = [ctype + ' *inv_de' + a for a in 'xyz'] | ||||
|     dhs = [ctype + ' *inv_dh' + a for a in 'xyz'] | ||||
| 
 | ||||
|     # | ||||
|     # Convert p to initial E (ie, apply right preconditioner and PEC) | ||||
|     # | ||||
|     ''' | ||||
|     Convert p to initial E (ie, apply right preconditioner and PEC) | ||||
|     ''' | ||||
|     p2e_source = jinja_env.get_template('p2e.cl').render(pec=pec) | ||||
|     P2E_kernel = ElementwiseKernel( | ||||
|         context, | ||||
|         name='P2E', | ||||
|         preamble=preamble, | ||||
|         operation=p2e_source, | ||||
|         arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg), | ||||
|         ) | ||||
|     P2E_kernel = ElementwiseKernel(context, | ||||
|                                    name='P2E', | ||||
|                                    preamble=preamble, | ||||
|                                    operation=p2e_source, | ||||
|                                    arguments=', '.join(ptrs('E', 'p', 'Pr') + pec_arg)) | ||||
| 
 | ||||
|     # | ||||
|     # Calculate intermediate H from intermediate E | ||||
|     # | ||||
|     e2h_source = jinja_env.get_template('e2h.cl').render( | ||||
|         mu=mu, | ||||
|         pmc=pmc, | ||||
|         common_cl=common_source, | ||||
|         ) | ||||
|     E2H_kernel = ElementwiseKernel( | ||||
|         context, | ||||
|         name='E2H', | ||||
|         preamble=preamble, | ||||
|         operation=e2h_source, | ||||
|         arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des), | ||||
|         ) | ||||
|     ''' | ||||
|     Calculate intermediate H from intermediate E | ||||
|     ''' | ||||
|     e2h_source = jinja_env.get_template('e2h.cl').render(mu=mu, | ||||
|                                                          pmc=pmc, | ||||
|                                                          common_cl=common_source) | ||||
|     E2H_kernel = ElementwiseKernel(context, | ||||
|                                    name='E2H', | ||||
|                                    preamble=preamble, | ||||
|                                    operation=e2h_source, | ||||
|                                    arguments=', '.join(ptrs('E', 'H', 'inv_mu') + pmc_arg + des)) | ||||
| 
 | ||||
|     # | ||||
|     # Calculate final E (including left preconditioner) | ||||
|     # | ||||
|     h2e_source = jinja_env.get_template('h2e.cl').render( | ||||
|         pec=pec, | ||||
|         common_cl=common_source, | ||||
|         ) | ||||
|     H2E_kernel = ElementwiseKernel( | ||||
|         context, | ||||
|         name='H2E', | ||||
|         preamble=preamble, | ||||
|         operation=h2e_source, | ||||
|         arguments=', '.join(ptrs('E', 'H', 'oeps', 'Pl') + pec_arg + dhs), | ||||
|         ) | ||||
|     ''' | ||||
|     Calculate final E (including left preconditioner) | ||||
|     ''' | ||||
|     h2e_source = jinja_env.get_template('h2e.cl').render(pec=pec, | ||||
|                                                          common_cl=common_source) | ||||
|     H2E_kernel = ElementwiseKernel(context, | ||||
|                                    name='H2E', | ||||
|                                    preamble=preamble, | ||||
|                                    operation=h2e_source, | ||||
|                                    arguments=', '.join(ptrs('E', 'H', 'oeps', 'Pl') + pec_arg + dhs)) | ||||
| 
 | ||||
|     def spmv( | ||||
|             E: pyopencl.array.Array, | ||||
|             H: pyopencl.array.Array, | ||||
|             p: pyopencl.array.Array, | ||||
|             idxes: Sequence[Sequence[pyopencl.array.Array]], | ||||
|             oeps: pyopencl.array.Array, | ||||
|             inv_mu: pyopencl.array.Array | None, | ||||
|             pec: pyopencl.array.Array | None, | ||||
|             pmc: pyopencl.array.Array | None, | ||||
|             Pl: pyopencl.array.Array, | ||||
|             Pr: pyopencl.array.Array, | ||||
|             e: list[pyopencl.Event], | ||||
|             ) -> list[pyopencl.Event]: | ||||
|     def spmv(E, H, p, idxes, oeps, inv_mu, pec, pmc, Pl, Pr, e): | ||||
|         e2 = P2E_kernel(E, p, Pr, pec, wait_for=e) | ||||
|         e2 = E2H_kernel(E, H, inv_mu, pmc, *idxes[0], wait_for=[e2]) | ||||
|         e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2]) | ||||
|         return [e2] | ||||
| 
 | ||||
|     logger.debug(f'Preamble: \n{preamble}') | ||||
|     logger.debug(f'p2e: \n{p2e_source}') | ||||
|     logger.debug(f'e2h: \n{e2h_source}') | ||||
|     logger.debug(f'h2e: \n{h2e_source}') | ||||
| 
 | ||||
|     return spmv | ||||
| 
 | ||||
| 
 | ||||
| @ -209,11 +159,8 @@ def create_xr_step(context: pyopencl.Context) -> operation: | ||||
|     after waiting for all in the list e | ||||
|     and returns a list of pyopencl.Event | ||||
| 
 | ||||
|     Args: | ||||
|         context: PyOpenCL context | ||||
| 
 | ||||
|     Returns: | ||||
|         Function for performing x and r updates | ||||
|     :param context: PyOpenCL context | ||||
|     :return: Function for performing x and r updates | ||||
|     """ | ||||
|     update_xr_source = ''' | ||||
|     x[i] = add(x[i], mul(alpha, p[i])); | ||||
| @ -222,28 +169,19 @@ def create_xr_step(context: pyopencl.Context) -> operation: | ||||
| 
 | ||||
|     xr_args = ', '.join(ptrs('x', 'p', 'r', 'v') + [ctype + ' alpha']) | ||||
| 
 | ||||
|     xr_kernel = ElementwiseKernel( | ||||
|         context, | ||||
|         name='XR', | ||||
|         preamble=preamble, | ||||
|         operation=update_xr_source, | ||||
|         arguments=xr_args, | ||||
|         ) | ||||
|     xr_kernel = ElementwiseKernel(context, | ||||
|                                   name='XR', | ||||
|                                   preamble=preamble, | ||||
|                                   operation=update_xr_source, | ||||
|                                   arguments=xr_args) | ||||
| 
 | ||||
|     def xr_update( | ||||
|             x: pyopencl.array.Array, | ||||
|             p: pyopencl.array.Array, | ||||
|             r: pyopencl.array.Array, | ||||
|             v: pyopencl.array.Array, | ||||
|             alpha: complex, | ||||
|             e: list[pyopencl.Event], | ||||
|             ) -> list[pyopencl.Event]: | ||||
|     def xr_update(x, p, r, v, alpha, e): | ||||
|         return [xr_kernel(x, p, r, v, alpha, wait_for=e)] | ||||
| 
 | ||||
|     return xr_update | ||||
| 
 | ||||
| 
 | ||||
| def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex, complex]]: | ||||
| def create_rhoerr_step(context: pyopencl.Context) -> operation: | ||||
|     """ | ||||
|     Return a function | ||||
|      ri_update(r, e) | ||||
| @ -254,11 +192,8 @@ def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex | ||||
|     after waiting for all pyopencl.Event in the list e | ||||
|     and returns a list of pyopencl.Event | ||||
| 
 | ||||
|     Args: | ||||
|         context: PyOpenCL context | ||||
| 
 | ||||
|     Returns: | ||||
|         Function for performing x and r updates | ||||
|     :param context: PyOpenCL context | ||||
|     :return: Function for performing x and r updates | ||||
|     """ | ||||
| 
 | ||||
|     update_ri_source = ''' | ||||
| @ -270,20 +205,18 @@ def create_rhoerr_step(context: pyopencl.Context) -> Callable[..., tuple[complex | ||||
|     # Use a vector type (double3) to make the reduction simpler | ||||
|     ri_dtype = pyopencl.array.vec.double3 | ||||
| 
 | ||||
|     ri_kernel = ReductionKernel( | ||||
|         context, | ||||
|         name='RHOERR', | ||||
|         preamble=preamble, | ||||
|         dtype_out=ri_dtype, | ||||
|         neutral='(double3)(0.0, 0.0, 0.0)', | ||||
|         map_expr=update_ri_source, | ||||
|         reduce_expr='a+b', | ||||
|         arguments=ctype + ' *r', | ||||
|         ) | ||||
|     ri_kernel = ReductionKernel(context, | ||||
|                                 name='RHOERR', | ||||
|                                 preamble=preamble, | ||||
|                                 dtype_out=ri_dtype, | ||||
|                                 neutral='(double3)(0.0, 0.0, 0.0)', | ||||
|                                 map_expr=update_ri_source, | ||||
|                                 reduce_expr='a+b', | ||||
|                                 arguments=ctype + ' *r') | ||||
| 
 | ||||
|     def ri_update(r: pyopencl.array.Array, e: list[pyopencl.Event]) -> tuple[complex, complex]: | ||||
|     def ri_update(r, e): | ||||
|         g = ri_kernel(r, wait_for=e).astype(ri_dtype).get() | ||||
|         rr, ri, ii = (g[qq] for qq in 'xyz') | ||||
|         rr, ri, ii = [g[q] for q in 'xyz'] | ||||
|         rho = rr + 2j * ri - ii | ||||
|         err = rr + ii | ||||
|         return rho, err | ||||
| @ -301,66 +234,48 @@ def create_p_step(context: pyopencl.Context) -> operation: | ||||
|     after waiting for all pyopencl.Event in the list e | ||||
|     and returns a list of pyopencl.Event | ||||
| 
 | ||||
|     Args: | ||||
|         context: PyOpenCL context | ||||
| 
 | ||||
|     Returns: | ||||
|         Function for performing the p update | ||||
|     :param context: PyOpenCL context | ||||
|     :return: Function for performing the p update | ||||
|     """ | ||||
|     update_p_source = ''' | ||||
|     p[i] = add(r[i], mul(beta, p[i])); | ||||
|     ''' | ||||
|     p_args = ptrs('p', 'r') + [ctype + ' beta'] | ||||
| 
 | ||||
|     p_kernel = ElementwiseKernel( | ||||
|         context, | ||||
|         name='P', | ||||
|         preamble=preamble, | ||||
|         operation=update_p_source, | ||||
|         arguments=', '.join(p_args), | ||||
|         ) | ||||
|     p_kernel = ElementwiseKernel(context, | ||||
|                                  name='P', | ||||
|                                  preamble=preamble, | ||||
|                                  operation=update_p_source, | ||||
|                                  arguments=', '.join(p_args)) | ||||
| 
 | ||||
|     def p_update( | ||||
|             p: pyopencl.array.Array, | ||||
|             r: pyopencl.array.Array, | ||||
|             beta: complex, | ||||
|             e: list[pyopencl.Event]) -> list[pyopencl.Event]: | ||||
|     def p_update(p, r, beta, e): | ||||
|         return [p_kernel(p, r, beta, wait_for=e)] | ||||
| 
 | ||||
|     return p_update | ||||
| 
 | ||||
| 
 | ||||
| def create_dot(context: pyopencl.Context) -> Callable[..., complex]: | ||||
| def create_dot(context: pyopencl.Context) -> operation: | ||||
|     """ | ||||
|     Return a function for performing the dot product | ||||
|      p @ v | ||||
|     with the signature | ||||
|      dot(p, v, e) -> complex | ||||
|      dot(p, v, e) -> float | ||||
| 
 | ||||
|     Args: | ||||
|         context: PyOpenCL context | ||||
| 
 | ||||
|     Returns: | ||||
|         Function for performing the dot product | ||||
|     :param context: PyOpenCL context | ||||
|     :return: Function for performing the dot product | ||||
|     """ | ||||
|     dot_dtype = numpy.complex128 | ||||
| 
 | ||||
|     dot_kernel = ReductionKernel( | ||||
|         context, | ||||
|         name='dot', | ||||
|         preamble=preamble, | ||||
|         dtype_out=dot_dtype, | ||||
|         neutral='zero', | ||||
|         map_expr='mul(p[i], v[i])', | ||||
|         reduce_expr='add(a, b)', | ||||
|         arguments=ptrs('p', 'v'), | ||||
|         ) | ||||
|     dot_kernel = ReductionKernel(context, | ||||
|                                  name='dot', | ||||
|                                  preamble=preamble, | ||||
|                                  dtype_out=dot_dtype, | ||||
|                                  neutral='zero', | ||||
|                                  map_expr='mul(p[i], v[i])', | ||||
|                                  reduce_expr='add(a, b)', | ||||
|                                  arguments=ptrs('p', 'v')) | ||||
| 
 | ||||
|     def dot( | ||||
|             p: pyopencl.array.Array, | ||||
|             v: pyopencl.array.Array, | ||||
|             e: list[pyopencl.Event], | ||||
|             ) -> complex: | ||||
|     def dot(p, v, e): | ||||
|         g = dot_kernel(p, v, wait_for=e) | ||||
|         return g.get() | ||||
| 
 | ||||
| @ -381,11 +296,8 @@ def create_a_csr(context: pyopencl.Context) -> operation: | ||||
|     The function waits on all the pyopencl.Event in e before running, and returns | ||||
|      a list of pyopencl.Event. | ||||
| 
 | ||||
|     Args: | ||||
|         context: PyOpenCL context | ||||
| 
 | ||||
|     Returns: | ||||
|         Function for sparse (M @ v) operation where M is in CSR format | ||||
|     :param context: PyOpenCL context | ||||
|     :return: Function for sparse (M @ v) operation where M is in CSR format | ||||
|     """ | ||||
|     spmv_source = ''' | ||||
|     int start = m_row_ptr[i]; | ||||
| @ -406,20 +318,13 @@ def create_a_csr(context: pyopencl.Context) -> operation: | ||||
|     m_args = 'int *m_row_ptr, int *m_col_ind, ' + ctype + ' *m_data' | ||||
|     v_in_args = ctype + ' *v_in' | ||||
| 
 | ||||
|     spmv_kernel = ElementwiseKernel( | ||||
|         context, | ||||
|         name='csr_spmv', | ||||
|         preamble=preamble, | ||||
|         operation=spmv_source, | ||||
|         arguments=', '.join((v_out_args, m_args, v_in_args)), | ||||
|         ) | ||||
|     spmv_kernel = ElementwiseKernel(context, | ||||
|                                     name='csr_spmv', | ||||
|                                     preamble=preamble, | ||||
|                                     operation=spmv_source, | ||||
|                                     arguments=', '.join((v_out_args, m_args, v_in_args))) | ||||
| 
 | ||||
|     def spmv( | ||||
|             v_out: pyopencl.array.Array, | ||||
|             m: CSRMatrix, | ||||
|             v_in: pyopencl.array.Array, | ||||
|             e: list[pyopencl.Event], | ||||
|             ) -> list[pyopencl.Event]: | ||||
|     def spmv(v_out, m, v_in, e): | ||||
|         return [spmv_kernel(v_out, m.row_ptr, m.col_ind, m.data, v_in, wait_for=e)] | ||||
| 
 | ||||
|     return spmv | ||||
|  | ||||
| @ -1,96 +0,0 @@ | ||||
| [build-system] | ||||
| requires = ["hatchling"] | ||||
| build-backend = "hatchling.build" | ||||
| 
 | ||||
| [project] | ||||
| name = "opencl_fdfd" | ||||
| description = "OpenCL FDFD solver" | ||||
| readme = "README.md" | ||||
| license = { file = "LICENSE.md" } | ||||
| authors = [ | ||||
|     { name="Jan Petykiewicz", email="jan@mpxd.net" }, | ||||
|     ] | ||||
| homepage = "https://mpxd.net/code/jan/opencl_fdfd" | ||||
| repository = "https://mpxd.net/code/jan/opencl_fdfd" | ||||
| keywords = [ | ||||
|     "FDFD", | ||||
|     "finite", | ||||
|     "difference", | ||||
|     "frequency", | ||||
|     "domain", | ||||
|     "simulation", | ||||
|     "optics", | ||||
|     "electromagnetic", | ||||
|     "dielectric", | ||||
|     "PML", | ||||
|     "solver", | ||||
|     "FDTD", | ||||
|     ] | ||||
| classifiers = [ | ||||
|     "Programming Language :: Python :: 3", | ||||
|     "Development Status :: 4 - Beta", | ||||
|     "Intended Audience :: Developers", | ||||
|     "Intended Audience :: Manufacturing", | ||||
|     "Intended Audience :: Science/Research", | ||||
|     "License :: OSI Approved :: GNU Affero General Public License v3", | ||||
|     "Topic :: Scientific/Engineering", | ||||
|     ] | ||||
| requires-python = ">=3.11" | ||||
| dynamic = ["version"] | ||||
| dependencies = [ | ||||
|     "numpy>=1.26", | ||||
|     "pyopencl", | ||||
|     "jinja2", | ||||
|     "meanas>=0.5", | ||||
|     ] | ||||
| 
 | ||||
| [tool.hatch.version] | ||||
| path = "opencl_fdfd/__init__.py" | ||||
| 
 | ||||
| 
 | ||||
| [tool.ruff] | ||||
| exclude = [ | ||||
|     ".git", | ||||
|     "dist", | ||||
|     ] | ||||
| line-length = 145 | ||||
| indent-width = 4 | ||||
| lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" | ||||
| lint.select = [ | ||||
|     "NPY", "E", "F", "W", "B", "ANN", "UP", "SLOT", "SIM", "LOG", | ||||
|     "C4", "ISC", "PIE", "PT", "RET", "TCH", "PTH", "INT", | ||||
|     "ARG", "PL", "R", "TRY", | ||||
|     "G010", "G101", "G201", "G202", | ||||
|     "Q002", "Q003", "Q004", | ||||
|     ] | ||||
| lint.ignore = [ | ||||
|     #"ANN001",   # No annotation | ||||
|     "ANN002",   # *args | ||||
|     "ANN003",   # **kwargs | ||||
|     "ANN401",   # Any | ||||
|     "ANN101",   # self: Self | ||||
|     "SIM108",   # single-line if / else assignment | ||||
|     "RET504",   # x=y+z; return x | ||||
|     "PIE790",   # unnecessary pass | ||||
|     "ISC003",   # non-implicit string concatenation | ||||
|     "C408",     # dict(x=y) instead of {'x': y} | ||||
|     "PLR09",    # Too many xxx | ||||
|     "PLR2004",  # magic number | ||||
|     "PLC0414",  # import x as x | ||||
|     "TRY003",   # Long exception message | ||||
|     ] | ||||
| 
 | ||||
| 
 | ||||
| [[tool.mypy.overrides]] | ||||
| module = [ | ||||
|     "scipy", | ||||
|     "scipy.optimize", | ||||
|     "scipy.linalg", | ||||
|     "scipy.sparse", | ||||
|     "scipy.sparse.linalg", | ||||
|     "pyopencl", | ||||
|     "pyopencl.array", | ||||
|     "pyopencl.elementwise", | ||||
|     "pyopencl.reduction", | ||||
|     ] | ||||
| ignore_missing_imports = true | ||||
							
								
								
									
										24
									
								
								setup.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								setup.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,24 @@ | ||||
| #!/usr/bin/env python | ||||
| 
 | ||||
| from setuptools import setup, find_packages | ||||
| 
 | ||||
| setup(name='opencl_fdfd', | ||||
|       version='0.3', | ||||
|       description='OpenCL FDFD solver', | ||||
|       author='Jan Petykiewicz', | ||||
|       author_email='anewusername@gmail.com', | ||||
|       url='https://mpxd.net/gogs/jan/opencl_fdfd', | ||||
|       packages=find_packages(), | ||||
|       package_data={ | ||||
|           'opencl_fdfd': ['kernels/*'] | ||||
|       }, | ||||
|       install_requires=[ | ||||
|             'numpy', | ||||
|             'pyopencl', | ||||
|             'jinja2', | ||||
|             'fdfd_tools>=0.3', | ||||
|       ], | ||||
|       extras_require={ | ||||
|       }, | ||||
|       ) | ||||
| 
 | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user