Compare commits

..

1 Commits

Author SHA1 Message Date
jan 235e0b6365 fixup package info 7 years ago

@ -38,12 +38,12 @@ generalization to multiple GPUs should be pretty straightforward
* numpy * numpy
* pyopencl * pyopencl
* jinja2 * jinja2
* [fdfd_tools](https://mpxd.net/code/jan/fdfd_tools) (>=0.2) * [fdfd_tools](https://mpxd.net/gogs/jan/fdfd_tools) (>=0.2)
Install with pip, via git: Install with pip, via git:
```bash ```bash
pip install git+https://mpxd.net/code/jan/opencl_fdfd.git@release pip install git+https://mpxd.net/gogs/jan/opencl_fdfd.git@release
``` ```

@ -31,7 +31,7 @@
Dependencies: Dependencies:
- fdfd_tools ( https://mpxd.net/code/jan/fdfd_tools ) - fdfd_tools ( https://mpxd.net/gogs/jan/fdfd_tools )
- numpy - numpy
- pyopencl - pyopencl
- jinja2 - jinja2
@ -41,4 +41,3 @@ from .main import cg_solver
__author__ = 'Jan Petykiewicz' __author__ = 'Jan Petykiewicz'
version = '0.3'

@ -16,7 +16,6 @@ satisfy the constraints for the 'conjugate gradient' algorithm
from typing import Dict, Any from typing import Dict, Any
import time import time
import logging
import numpy import numpy
from numpy.linalg import norm from numpy.linalg import norm
@ -28,11 +27,6 @@ import fdfd_tools.solvers
from . import ops from . import ops
__author__ = 'Jan Petykiewicz'
logger = logging.getLogger(__name__)
class CSRMatrix(object): class CSRMatrix(object):
""" """
Matrix stored in Compressed Sparse Row format, in GPU RAM. Matrix stored in Compressed Sparse Row format, in GPU RAM.
@ -55,6 +49,7 @@ def cg(A: 'scipy.sparse.csr_matrix',
err_threshold: float = 1e-6, err_threshold: float = 1e-6,
context: pyopencl.Context = None, context: pyopencl.Context = None,
queue: pyopencl.CommandQueue = None, queue: pyopencl.CommandQueue = None,
verbose: bool = False,
) -> numpy.ndarray: ) -> numpy.ndarray:
""" """
General conjugate-gradient solver for sparse matrices, where A @ x = b. General conjugate-gradient solver for sparse matrices, where A @ x = b.
@ -65,6 +60,7 @@ def cg(A: 'scipy.sparse.csr_matrix',
:param err_threshold: Error threshold for successful solve, relative to norm(b) :param err_threshold: Error threshold for successful solve, relative to norm(b)
:param context: PyOpenCL context. Will be created if not given. :param context: PyOpenCL context. Will be created if not given.
:param queue: PyOpenCL command queue. Will be created if not given. :param queue: PyOpenCL command queue. Will be created if not given.
:param verbose: Whether to print statistics to screen.
:return: Solution vector x; returned even if solve doesn't converge. :return: Solution vector x; returned even if solve doesn't converge.
""" """
@ -106,11 +102,13 @@ def cg(A: 'scipy.sparse.csr_matrix',
_, err2 = rhoerr_step(r, []) _, err2 = rhoerr_step(r, [])
b_norm = numpy.sqrt(err2) b_norm = numpy.sqrt(err2)
logging.debug('b_norm check: ', b_norm) if verbose:
print('b_norm check: ', b_norm)
success = False success = False
for k in range(max_iters): for k in range(max_iters):
logging.debug('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha)) if verbose:
print('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha), end=' ')
rho_prev = rho rho_prev = rho
e = xr_step(x, p, r, v, alpha, []) e = xr_step(x, p, r, v, alpha, [])
@ -118,7 +116,8 @@ def cg(A: 'scipy.sparse.csr_matrix',
errs += [numpy.sqrt(err2) / b_norm] errs += [numpy.sqrt(err2) / b_norm]
logging.debug('err {}'.format(errs[-1])) if verbose:
print('err', errs[-1])
if errs[-1] < err_threshold: if errs[-1] < err_threshold:
success = True success = True
@ -129,7 +128,7 @@ def cg(A: 'scipy.sparse.csr_matrix',
alpha = rho / dot(p, v, e) alpha = rho / dot(p, v, e)
if verbose and k % 1000 == 0: if verbose and k % 1000 == 0:
logging.info('iteration {}'.format(k)) print(k)
''' '''
Done solving Done solving
@ -138,16 +137,17 @@ def cg(A: 'scipy.sparse.csr_matrix',
x = x.get() x = x.get()
if success: if verbose:
logging.info('Solve success') if success:
else: print('Success', end='')
logging.warning('Solve failure') else:
logging.info('{} iterations in {} sec: {} iterations/sec \ print('Failure', end=', ')
'.format(k, time_elapsed, k / time_elapsed)) print(', {} iterations in {} sec: {} iterations/sec \
logging.debug('final error {}'.format(errs[-1])) '.format(k, time_elapsed, k / time_elapsed))
logging.debug('overhead {} sec'.format(start_time2 - start_time)) print('final error', errs[-1])
print('overhead {} sec'.format(start_time2 - start_time))
logging.info('Final residual: {}'.format(norm(A @ x - b) / norm(b))) print('Final residual:', norm(A @ x - b) / norm(b))
return x return x

@ -31,7 +31,7 @@ __global char *pmc_z = pmc + ZZ;
//Update H components; set them to 0 if PMC is enabled at that location. //Update H components; set them to 0 if PMC is enabled at that location.
//Mu division and PMC conditional are only included if {mu} and {pmc} are true //Mu division and PMC conditional are only included if {{mu}} and {{pmc}} are true
{% if pmc -%} {% if pmc -%}
if (pmc_x[i] != 0) { if (pmc_x[i] != 0) {
Hx[i] = zero; Hx[i] = zero;
@ -42,9 +42,9 @@ if (pmc_x[i] != 0) {
ctype Dyz = mul(sub(Ey[i + pz], Ey[i]), inv_dez[z]); ctype Dyz = mul(sub(Ey[i + pz], Ey[i]), inv_dez[z]);
ctype x_curl = sub(Dzy, Dyz); ctype x_curl = sub(Dzy, Dyz);
{%- if mu %} {%- if mu -%}
Hx[i] = mul(inv_mu_x[i], x_curl); Hx[i] = mul(inv_mu_x[i], x_curl);
{%- else %} {%- else -%}
Hx[i] = x_curl; Hx[i] = x_curl;
{%- endif %} {%- endif %}
} }
@ -59,9 +59,9 @@ if (pmc_y[i] != 0) {
ctype Dzx = mul(sub(Ez[i + px], Ez[i]), inv_dex[x]); ctype Dzx = mul(sub(Ez[i + px], Ez[i]), inv_dex[x]);
ctype y_curl = sub(Dxz, Dzx); ctype y_curl = sub(Dxz, Dzx);
{%- if mu %} {%- if mu -%}
Hy[i] = mul(inv_mu_y[i], y_curl); Hy[i] = mul(inv_mu_y[i], y_curl);
{%- else %} {%- else -%}
Hy[i] = y_curl; Hy[i] = y_curl;
{%- endif %} {%- endif %}
} }
@ -76,9 +76,9 @@ if (pmc_z[i] != 0) {
ctype Dxy = mul(sub(Ex[i + py], Ex[i]), inv_dey[y]); ctype Dxy = mul(sub(Ex[i + py], Ex[i]), inv_dey[y]);
ctype z_curl = sub(Dyx, Dxy); ctype z_curl = sub(Dyx, Dxy);
{%- if mu %} {%- if mu -%}
Hz[i] = mul(inv_mu_z[i], z_curl); Hz[i] = mul(inv_mu_z[i], z_curl);
{%- else %} {%- else -%}
Hz[i] = z_curl; Hz[i] = z_curl;
{%- endif %} {%- endif %}
} }

@ -8,7 +8,6 @@ a matrix).
from typing import List from typing import List
import time import time
import logging
import numpy import numpy
from numpy.linalg import norm from numpy.linalg import norm
@ -19,11 +18,8 @@ import fdfd_tools.operators
from . import ops from . import ops
__author__ = 'Jan Petykiewicz' __author__ = 'Jan Petykiewicz'
logger = logging.getLogger(__name__)
def cg_solver(omega: complex, def cg_solver(omega: complex,
dxes: List[List[numpy.ndarray]], dxes: List[List[numpy.ndarray]],
@ -36,6 +32,7 @@ def cg_solver(omega: complex,
max_iters: int = 40000, max_iters: int = 40000,
err_threshold: float = 1e-6, err_threshold: float = 1e-6,
context: pyopencl.Context = None, context: pyopencl.Context = None,
verbose: bool = False,
) -> numpy.ndarray: ) -> numpy.ndarray:
""" """
OpenCL FDFD solver using the iterative conjugate gradient (cg) method OpenCL FDFD solver using the iterative conjugate gradient (cg) method
@ -60,6 +57,7 @@ def cg_solver(omega: complex,
:param err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success. :param err_threshold: If (r @ r.conj()) / norm(1j * omega * J) < err_threshold, success.
Default 1e-6. Default 1e-6.
:param context: PyOpenCL context to run in. If not given, construct a new context. :param context: PyOpenCL context to run in. If not given, construct a new context.
:param verbose: If True, print progress to stdout. Default False.
:return: E-field which solves the system. Returned even if we did not converge. :return: E-field which solves the system. Returned even if we did not converge.
""" """
@ -173,13 +171,12 @@ def cg_solver(omega: complex,
_, err2 = rhoerr_step(r, []) _, err2 = rhoerr_step(r, [])
b_norm = numpy.sqrt(err2) b_norm = numpy.sqrt(err2)
logging.debug('b_norm check: {}'.format(b_norm)) print('b_norm check: ', b_norm)
success = False success = False
for k in range(max_iters): for k in range(max_iters):
do_print = (k % 100 == 0) if verbose:
if do_print: print('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha), end=' ')
logger.debug('[{:06d}] rho {:.4} alpha {:4.4}'.format(k, rho, alpha))
rho_prev = rho rho_prev = rho
e = xr_step(x, p, r, v, alpha, []) e = xr_step(x, p, r, v, alpha, [])
@ -187,8 +184,8 @@ def cg_solver(omega: complex,
errs += [numpy.sqrt(err2) / b_norm] errs += [numpy.sqrt(err2) / b_norm]
if do_print: if verbose:
logger.debug('err {}'.format(errs[-1])) print('err', errs[-1])
if errs[-1] < err_threshold: if errs[-1] < err_threshold:
success = True success = True
@ -199,7 +196,7 @@ def cg_solver(omega: complex,
alpha = rho / dot(p, v, e) alpha = rho / dot(p, v, e)
if k % 1000 == 0: if k % 1000 == 0:
logger.info('iteration {}'.format(k)) print(k)
''' '''
Done solving Done solving
@ -213,18 +210,18 @@ def cg_solver(omega: complex,
x = (Pr * x).get() x = (Pr * x).get()
if success: if success:
logger.info('Solve success') print('Success', end='')
else: else:
logger.warning('Solve failure') print('Failure', end=', ')
logger.info('{} iterations in {} sec: {} iterations/sec \ print(', {} iterations in {} sec: {} iterations/sec \
'.format(k, time_elapsed, k / time_elapsed)) '.format(k, time_elapsed, k / time_elapsed))
logger.debug('final error {}'.format(errs[-1])) print('final error', errs[-1])
logger.debug('overhead {} sec'.format(start_time2 - start_time)) print('overhead {} sec'.format(start_time2 - start_time))
A0 = fdfd_tools.operators.e_full(omega, dxes, epsilon, mu).tocsr() A0 = fdfd_tools.operators.e_full(omega, dxes, epsilon, mu).tocsr()
if adjoint: if adjoint:
# Remember we conjugated all the contents of A earlier # Remember we conjugated all the contents of A earlier
A0 = A0.T A0 = A0.T
logger.info('Post-everything residual: {}'.format(norm(A0 @ x - b) / norm(b))) print('Post-everything residual:', norm(A0 @ x - b) / norm(b))
return x return x

@ -8,7 +8,6 @@ See kernels/ for any of the .cl files loaded in this file.
""" """
from typing import List, Callable from typing import List, Callable
import logging
import numpy import numpy
import jinja2 import jinja2
@ -19,8 +18,6 @@ from pyopencl.elementwise import ElementwiseKernel
from pyopencl.reduction import ReductionKernel from pyopencl.reduction import ReductionKernel
logger = logging.getLogger(__name__)
# Create jinja2 env on module load # Create jinja2 env on module load
jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
@ -148,11 +145,6 @@ def create_a(context: pyopencl.Context,
e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2]) e2 = H2E_kernel(E, H, oeps, Pl, pec, *idxes[1], wait_for=[e2])
return [e2] return [e2]
logger.debug('Preamble: \n{}'.format(preamble))
logger.debug('p2e: \n{}'.format(p2e_source))
logger.debug('e2h: \n{}'.format(e2h_source))
logger.debug('h2e: \n{}'.format(h2e_source))
return spmv return spmv

@ -1,19 +1,13 @@
#!/usr/bin/env python3 #!/usr/bin/env python
from setuptools import setup, find_packages from setuptools import setup, find_packages
import opencl_fdfd
with open('README.md', 'r') as f:
long_description = f.read()
setup(name='opencl_fdfd', setup(name='opencl_fdfd',
version=opencl_fdfd.version, version='0.3',
description='OpenCL FDFD solver', description='OpenCL FDFD solver',
long_description=long_description,
long_description_content_type='text/markdown',
author='Jan Petykiewicz', author='Jan Petykiewicz',
author_email='anewusername@gmail.com', author_email='anewusername@gmail.com',
url='https://mpxd.net/code/jan/opencl_fdfd', url='https://mpxd.net/gogs/jan/opencl_fdfd',
packages=find_packages(), packages=find_packages(),
package_data={ package_data={
'opencl_fdfd': ['kernels/*'] 'opencl_fdfd': ['kernels/*']

Loading…
Cancel
Save