Compare commits

...

3 Commits

Author SHA1 Message Date
Jan Petykiewicz cf0db63a3f Revert "Add restrict keyword to pointers (not sharing the same memory for multiple fields)"
This reverts commit 60b70bb332.

It appears to have minimal performance impact, and fails to compile on
nvidia cards.
5 years ago
Jan Petykiewicz 89d99187b6 Remove unused code 5 years ago
Jan Petykiewicz 1627d0da05 Consolidate poynting code into update_h 5 years ago

@ -6,13 +6,14 @@
* common_header: Rendered contents of common.cl
* pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
* axes, polarities, and thicknesses.
* do_poynting: Whether to precalculate poynting vector components (boolean)
* do_poynting: Whether to calculate poynting vector (boolean)
* do_poynting_halves: Whether to calculate half-step poynting vectors (boolean)
* uniform_dx: If grid is uniform, uniform_dx should be the grid spacing.
* Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as
* OpenCL args.
*
* OpenCL args:
* E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
* E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [S], [S0, S1]
*/
{{common_header}}
@ -52,24 +53,7 @@ if ({{r}} == s{{r}} - 1) {
dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]);
dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]);
}
{% endfor -%}
{%- if do_poynting %}
/*
* Precalculate averaged E
*/
ftype aExy = Ex[i + py] + Ex[i];
ftype aExz = Ex[i + pz] + Ex[i];
ftype aEyx = Ey[i + px] + Ey[i];
ftype aEyz = Ey[i + pz] + Ey[i];
ftype aEzx = Ez[i + px] + Ez[i];
ftype aEzy = Ez[i + py] + Ez[i];
{%- endif %}
{%- endfor %}
@ -118,7 +102,7 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) {
/*
* Update H
*/
{% if do_poynting -%}
{% if do_poynting or do_poynting_halves -%}
// Save old H for averaging
ftype Hx_old = Hx[i];
ftype Hy_old = Hy[i];
@ -131,25 +115,40 @@ Hy[i] -= dt * (dExz - dEzx - pHyi);
Hz[i] -= dt * (dEyx - dExy - pHzi);
{% if do_poynting -%}
/*
* Calculate unscaled S components at ??? locations
* //TODO: document S locations and types
*/
__global ftype *Sx = S + XX;
__global ftype *Sy = S + YY;
__global ftype *Sz = S + ZZ;
// Average H across timesteps
ftype aHxt = Hx[i] + Hx_old;
ftype aHyt = Hy[i] + Hy_old;
ftype aHzt = Hz[i] + Hz_old;
/*
* Calculate unscaled S components at H locations
*/
__global ftype *oSxy = oS + 0 * field_size;
__global ftype *oSyz = oS + 1 * field_size;
__global ftype *oSzx = oS + 2 * field_size;
__global ftype *oSxz = oS + 3 * field_size;
__global ftype *oSyx = oS + 4 * field_size;
__global ftype *oSzy = oS + 5 * field_size;
oSxy[i] = aEyx * aHzt;
oSxz[i] = -aEzx * aHyt;
oSyz[i] = aEzy * aHxt;
oSyx[i] = -aExy * aHzt;
oSzx[i] = aExz * aHyt;
oSzy[i] = -aEyz * aHxt;
Sx[i] = Ey[i + px] * aHzt - Ez[i + px] * aHyt;
Sy[i] = Ez[i + py] * aHxt - Ex[i + py] * aHzt;
Sz[i] = Ex[i + pz] * aHyt - Ey[i + pz] * aHxt;
{%- endif -%}
{% if do_poynting_halves -%}
/*
* Calculate unscaled S components at ??? locations
* //TODO: document S locations and types
*/
__global ftype *Sx0 = S0 + XX;
__global ftype *Sy0 = S0 + YY;
__global ftype *Sz0 = S0 + ZZ;
__global ftype *Sx1 = S1 + XX;
__global ftype *Sy1 = S1 + YY;
__global ftype *Sz1 = S1 + ZZ;
Sx0[i] = Ey[i + px] * Hz_old - Ez[i + px] * Hy_old;
Sy0[i] = Ez[i + py] * Hx_old - Ex[i + py] * Hz_old;
Sz0[i] = Ex[i + pz] * Hy_old - Ey[i + pz] * Hx_old;
Sx1[i] = Ey[i + px] * Hz[i] - Ez[i + px] * Hy[i];
Sy1[i] = Ez[i + py] * Hx[i] - Ex[i + py] * Hz[i];
Sz1[i] = Ex[i + pz] * Hy[i] - Ey[i + pz] * Hx[i];
{%- endif -%}

@ -1,36 +0,0 @@
/*
* Update E-field, including any PMLs.
*
* Template parameters:
* common_header: Rendered contents of common.cl
* pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
* pml_thickness: Number of cells (integer)
*
* OpenCL args:
* E, H, dt, S, oS
*/
{{common_header}}
//////////////////////////////////////////////////////////////////////
/*
* Calculate S from oS (pre-calculated components)
*/
__global ftype *Sx = S + XX;
__global ftype *Sy = S + YY;
__global ftype *Sz = S + ZZ;
// Use unscaled S components from H locations
__global ftype *oSxy = oS + 0 * field_size;
__global ftype *oSyz = oS + 1 * field_size;
__global ftype *oSzx = oS + 2 * field_size;
__global ftype *oSxz = oS + 3 * field_size;
__global ftype *oSyx = oS + 4 * field_size;
__global ftype *oSzy = oS + 5 * field_size;
ftype s_factor = dt * 0.125;
Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor;
Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor;
Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor;

@ -26,7 +26,7 @@ class Simulation(object):
"""
Constructs and holds the basic FDTD operations and related fields
After constructing this object, call the (update_E, update_H, update_S) members
After constructing this object, call the (update_E, update_H) members
to perform FDTD updates on the stored (E, H, S) fields:
pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np']
@ -43,8 +43,6 @@ class Simulation(object):
# Perturb the field (i.e., add a soft current source)
sim.E[ind] += numpy.sin(omega * t * sim.dt)
event = sim.update_H([])
if sim.update_S:
event = sim.update_S([event])
event.wait()
with lzma.open('saved_simulation', 'wb') as f:
@ -112,6 +110,7 @@ class Simulation(object):
:param float_type: numpy.float32 or numpy.float64. Default numpy.float32.
:param do_poynting: If true, enables calculation of the poynting vector, S.
Poynting vector calculation adds the following computational burdens:
****INACCURATE, TODO FIXME*****
* During update_H, ~6 extra additions/cell are performed in order to spatially
average E and temporally average H. These quantities are multiplied
(6 multiplications/cell) and then stored (6 writes/cell, cache-friendly).
@ -170,7 +169,7 @@ class Simulation(object):
ctype = type_to_C(self.arg_type)
def ptr(arg: str) -> str:
return ctype + ' * restrict ' + arg
return ctype + ' *' + arg
base_fields = OrderedDict()
base_fields[ptr('E')] = self.E
@ -226,12 +225,7 @@ class Simulation(object):
S_fields = OrderedDict()
if do_poynting:
S_source = jinja_env.get_template('update_s.cl').render(**jinja_args)
self.sources['S'] = S_source
self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type)
self.S = pyopencl.array.zeros_like(self.E)
S_fields[ptr('oS')] = self.oS
S_fields[ptr('S')] = self.S
J_fields = OrderedDict()
@ -257,8 +251,6 @@ class Simulation(object):
'''
self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields))
self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields))
if do_poynting:
self.update_S = self._create_operation(S_source, (base_fields, S_fields))
if bloch_boundaries:
self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields))
self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields))

Loading…
Cancel
Save