Compare commits

...

3 Commits

Author SHA1 Message Date
cf0db63a3f Revert "Add restrict keyword to pointers (not sharing the same memory for multiple fields)"
This reverts commit 60b70bb332.

It appears to have minimal performance impact, and fails to compile on
nvidia cards.
2019-07-17 00:53:55 -07:00
89d99187b6 Remove unused code 2019-07-17 00:52:31 -07:00
1627d0da05 Consolidate poynting code into update_h 2019-07-15 00:11:09 -07:00
3 changed files with 39 additions and 84 deletions

View File

@ -6,13 +6,14 @@
* common_header: Rendered contents of common.cl
* pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
* axes, polarities, and thicknesses.
* do_poynting: Whether to precalculate poynting vector components (boolean)
* do_poynting: Whether to calculate poynting vector (boolean)
* do_poynting_halves: Whether to calculate half-step poynting vectors (boolean)
* uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
* Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as
* OpenCL args.
*
* OpenCL args:
* E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
* E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [S], [S0, S1]
*/
{{common_header}}
@ -52,24 +53,7 @@ if ({{r}} == s{{r}} - 1) {
dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
}
{% endfor -%}
{%- if do_poynting %}
/*
* Precalculate averaged E
*/
ftype aExy = Ex[i + py] + Ex[i];
ftype aExz = Ex[i + pz] + Ex[i];
ftype aEyx = Ey[i + px] + Ey[i];
ftype aEyz = Ey[i + pz] + Ey[i];
ftype aEzx = Ez[i + px] + Ez[i];
ftype aEzy = Ez[i + py] + Ez[i];
{%- endif %}
{%- endfor %}
@ -118,7 +102,7 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
/*
* Update H
*/
{% if do_poynting -%}
{% if do_poynting or do_poynting_halves -%}
// Save old H for averaging
ftype Hx_old = Hx[i];
ftype Hy_old = Hy[i];
@ -131,25 +115,40 @@ Hy[i] -= dt * (dExz - dEzx - pHyi);
Hz[i] -= dt * (dEyx - dExy - pHzi);
{% if do_poynting -%}
/*
* Calculate unscaled S components at ??? locations
* //TODO: document S locations and types
*/
__global ftype *Sx = S + XX;
__global ftype *Sy = S + YY;
__global ftype *Sz = S + ZZ;
// Average H across timesteps
ftype aHxt = Hx[i] + Hx_old;
ftype aHyt = Hy[i] + Hy_old;
ftype aHzt = Hz[i] + Hz_old;
/*
* Calculate unscaled S components at H locations
*/
__global ftype *oSxy = oS + 0 * field_size;
__global ftype *oSyz = oS + 1 * field_size;
__global ftype *oSzx = oS + 2 * field_size;
__global ftype *oSxz = oS + 3 * field_size;
__global ftype *oSyx = oS + 4 * field_size;
__global ftype *oSzy = oS + 5 * field_size;
oSxy[i] = aEyx * aHzt;
oSxz[i] = -aEzx * aHyt;
oSyz[i] = aEzy * aHxt;
oSyx[i] = -aExy * aHzt;
oSzx[i] = aExz * aHyt;
oSzy[i] = -aEyz * aHxt;
Sx[i] = Ey[i + px] * aHzt - Ez[i + px] * aHyt;
Sy[i] = Ez[i + py] * aHxt - Ex[i + py] * aHzt;
Sz[i] = Ex[i + pz] * aHyt - Ey[i + pz] * aHxt;
{%- endif -%}
{% if do_poynting_halves -%}
/*
* Calculate unscaled S components at ??? locations
* //TODO: document S locations and types
*/
__global ftype *Sx0 = S0 + XX;
__global ftype *Sy0 = S0 + YY;
__global ftype *Sz0 = S0 + ZZ;
__global ftype *Sx1 = S1 + XX;
__global ftype *Sy1 = S1 + YY;
__global ftype *Sz1 = S1 + ZZ;
Sx0[i] = Ey[i + px] * Hz_old - Ez[i + px] * Hy_old;
Sy0[i] = Ez[i + py] * Hx_old - Ex[i + py] * Hz_old;
Sz0[i] = Ex[i + pz] * Hy_old - Ey[i + pz] * Hx_old;
Sx1[i] = Ey[i + px] * Hz[i] - Ez[i + px] * Hy[i];
Sy1[i] = Ez[i + py] * Hx[i] - Ex[i + py] * Hz[i];
Sz1[i] = Ex[i + pz] * Hy[i] - Ey[i + pz] * Hx[i];
{%- endif -%}

View File

@ -1,36 +0,0 @@
/*
* Update E-field, including any PMLs.
*
* Template parameters:
* common_header: Rendered contents of common.cl
* pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
* pml_thickness: Number of cells (integer)
*
* OpenCL args:
* E, H, dt, S, oS
*/
{{common_header}}
//////////////////////////////////////////////////////////////////////
/*
* Calculate S from oS (pre-calculated components)
*/
__global ftype *Sx = S + XX;
__global ftype *Sy = S + YY;
__global ftype *Sz = S + ZZ;
// Use unscaled S components from H locations
__global ftype *oSxy = oS + 0 * field_size;
__global ftype *oSyz = oS + 1 * field_size;
__global ftype *oSzx = oS + 2 * field_size;
__global ftype *oSxz = oS + 3 * field_size;
__global ftype *oSyx = oS + 4 * field_size;
__global ftype *oSzy = oS + 5 * field_size;
ftype s_factor = dt * 0.125;
Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor;
Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor;
Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor;

View File

@ -26,7 +26,7 @@ class Simulation(object):
"""
Constructs and holds the basic FDTD operations and related fields
After constructing this object, call the (update_E, update_H, update_S) members
After constructing this object, call the (update_E, update_H) members
to perform FDTD updates on the stored (E, H, S) fields:
pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
@ -43,8 +43,6 @@ class Simulation(object):
# Perturb the field (i.e., add a soft current source)
sim.E[ind] += numpy.sin(omega * t * sim.dt)
event = sim.update_H([])
if sim.update_S:
event = sim.update_S([event])
event.wait()
with lzma.open('saved_simulation', 'wb') as f:
@ -112,6 +110,7 @@ class Simulation(object):
:param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
:param do_poynting: If true, enables calculation of the poynting vector, S.
Poynting vector calculation adds the following computational burdens:
****INACCURATE, TODO FIXME*****
* During update_H, ~6 extra additions/cell are performed in order to spatially
average E and temporally average H. These quantities are multiplied
(6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
@ -170,7 +169,7 @@ class Simulation(object):
ctype = type_to_C(self.arg_type)
def ptr(arg: str) -> str:
return ctype + ' * restrict ' + arg
return ctype + ' *' + arg
base_fields = OrderedDict()
base_fields[ptr('E')] = self.E
@ -226,12 +225,7 @@ class Simulation(object):
S_fields = OrderedDict()
if do_poynting:
S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
self.sources['S'] = S_source
self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type)
self.S = pyopencl.array.zeros_like(self.E)
S_fields[ptr('oS')] = self.oS
S_fields[ptr('S')] = self.S
J_fields = OrderedDict()
@ -257,8 +251,6 @@ class Simulation(object):
'''
self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
if do_poynting:
self.update_S = self._create_operation(S_source, (base_fields, S_fields))
if bloch_boundaries:
self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))