Rewrite, with the following features:
- Move to jinja2 templates for the opencl code - Combine PML code into the E, H updates for speed - Add Poynting vector calculation code, including precalculation during H update - Use arrays for PML parameters (p0, p1) - Switch to linearized, C-ordered fields (~50% performance boost??) - Added jinja2 and fdfd_tools dependencies
This commit is contained in:
parent
cd72219d0b
commit
d34c478f1d
9 changed files with 511 additions and 407 deletions
84
fdtd/kernels/common.cl
Normal file
84
fdtd/kernels/common.cl
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
{#
|
||||
/* Common code for E, H updates
|
||||
*
|
||||
* Template parameters:
|
||||
* ftype type name (e.g. float or double)
|
||||
* shape list of 3 ints specifying shape of fields
|
||||
*/
|
||||
#}
|
||||
|
||||
typedef {{ftype}} ftype;
|
||||
|
||||
|
||||
/*
|
||||
* Field size info
|
||||
*/
|
||||
const size_t sx = {{shape[0]}};
|
||||
const size_t sy = {{shape[1]}};
|
||||
const size_t sz = {{shape[2]}};
|
||||
const size_t field_size = sx * sy * sz;
|
||||
|
||||
//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
|
||||
// i is outside the bounds of Ex[].
|
||||
if (i >= field_size) {
|
||||
PYOPENCL_ELWISE_CONTINUE;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Array indexing
|
||||
*/
|
||||
// Given a linear index i and shape (sx, sy, sz), defines x, y, and z
|
||||
// as the 3D indices of the current element (i).
|
||||
// (ie, converts linear index [i] to field indices (x, y, z)
|
||||
const size_t x = i / (sz * sy);
|
||||
const size_t y = (i - x * sz * sy) / sz;
|
||||
const size_t z = (i - y * sz - x * sz * sy);
|
||||
|
||||
// Calculate linear index offsets corresponding to offsets in 3D
|
||||
// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z)
|
||||
const size_t dix = sz * sy;
|
||||
const size_t diy = sz;
|
||||
const size_t diz = 1;
|
||||
|
||||
|
||||
/*
|
||||
* Pointer math
|
||||
*/
|
||||
//Pointer offsets into the components of a linearized vector-field
|
||||
// (eg. Hx = H + XX, where H and Hx are pointers)
|
||||
const size_t XX = 0;
|
||||
const size_t YY = field_size;
|
||||
const size_t ZZ = field_size * 2;
|
||||
|
||||
//Define pointers to vector components of each field (eg. Hx = H + XX)
|
||||
__global ftype *Ex = E + XX;
|
||||
__global ftype *Ey = E + YY;
|
||||
__global ftype *Ez = E + ZZ;
|
||||
|
||||
__global ftype *Hx = H + XX;
|
||||
__global ftype *Hy = H + YY;
|
||||
__global ftype *Hz = H + ZZ;
|
||||
|
||||
|
||||
/*
|
||||
* Implement periodic boundary conditions
|
||||
*
|
||||
* mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction.
|
||||
* In the event that we start at x == 0, we actually want to wrap around and grab the cell
|
||||
* x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix .
|
||||
*
|
||||
* px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction.
|
||||
* In the event that we start at x == (sx - 1), we actually want to wrap around and grab
|
||||
* the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
|
||||
*/
|
||||
{% for r in 'xyz' %}
|
||||
int m{{r}} = -di{{r}};
|
||||
int p{{r}} = +di{{r}};
|
||||
int wrap_{{r}} = (s{{r}} - 1) * di{{r}};
|
||||
if ( {{r}} == 0 ) {
|
||||
m{{r}} = wrap_{{r}};
|
||||
} else if ( {{r}} == s{{r}} - 1 ) {
|
||||
p{{r}} = -wrap_{{r}};
|
||||
}
|
||||
{% endfor %}
|
||||
78
fdtd/kernels/update_e.cl
Normal file
78
fdtd/kernels/update_e.cl
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Update E-field, including any PMLs.
|
||||
*
|
||||
* Template parameters:
|
||||
* common_header: Rendered contents of common.cl
|
||||
* pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
|
||||
* pml_thickness: Number of cells (integer)
|
||||
*
|
||||
* OpenCL args:
|
||||
* E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
|
||||
*/
|
||||
|
||||
{{common_header}}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__global ftype *epsx = eps + XX;
|
||||
__global ftype *epsy = eps + YY;
|
||||
__global ftype *epsz = eps + ZZ;
|
||||
|
||||
{% if pmls -%}
|
||||
const int pml_thickness = {{pml_thickness}};
|
||||
{%- endif %}
|
||||
|
||||
/*
|
||||
* Precalclate derivatives
|
||||
*/
|
||||
ftype dHxy = Hx[i] - Hx[i + my];
|
||||
ftype dHxz = Hx[i] - Hx[i + mz];
|
||||
|
||||
ftype dHyx = Hy[i] - Hy[i + mx];
|
||||
ftype dHyz = Hy[i] - Hy[i + mz];
|
||||
|
||||
ftype dHzx = Hz[i] - Hz[i + mx];
|
||||
ftype dHzy = Hz[i] - Hz[i + my];
|
||||
|
||||
/*
|
||||
* PML Update
|
||||
*/
|
||||
// PML effects on E
|
||||
ftype pExi = 0;
|
||||
ftype pEyi = 0;
|
||||
ftype pEzi = 0;
|
||||
|
||||
{% for r, p in pmls -%}
|
||||
{%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
|
||||
{%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
|
||||
{%- if r != 'y' -%}
|
||||
{%- set se, sh = '-', '+' -%}
|
||||
{%- else -%}
|
||||
{%- set se, sh = '+', '-' -%}
|
||||
{%- endif -%}
|
||||
|
||||
{%- if p == 'n' %}
|
||||
|
||||
if ( {{r}} < pml_thickness ) {
|
||||
const size_t ir = {{r}}; // index into pml parameters
|
||||
|
||||
{%- elif p == 'p' %}
|
||||
|
||||
if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
|
||||
const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters
|
||||
|
||||
{%- endif %}
|
||||
const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi
|
||||
{{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
|
||||
{{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
|
||||
pE{{u}}i {{se}}= {{psi ~ u}}[ip];
|
||||
pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
|
||||
}
|
||||
{%- endfor %}
|
||||
|
||||
/*
|
||||
* Update E
|
||||
*/
|
||||
Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi);
|
||||
Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi);
|
||||
Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi);
|
||||
125
fdtd/kernels/update_h.cl
Normal file
125
fdtd/kernels/update_h.cl
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
* Update H-field, including any PMLs.
|
||||
* Also precalculate values for poynting vector if necessary.
|
||||
*
|
||||
* Template parameters:
|
||||
* common_header: Rendered contents of common.cl
|
||||
* pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
|
||||
* pml_thickness: Number of cells (integer)
|
||||
* do_poynting: Whether to precalculate poynting vector components (boolean)
|
||||
*
|
||||
* OpenCL args:
|
||||
* E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
|
||||
*/
|
||||
|
||||
{{common_header}}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
{% if pmls -%}
|
||||
const int pml_thickness = {{pml_thickness}};
|
||||
{%- endif %}
|
||||
|
||||
/*
|
||||
* Precalculate derivatives
|
||||
*/
|
||||
ftype dExy = Ex[i + py] - Ex[i];
|
||||
ftype dExz = Ex[i + pz] - Ex[i];
|
||||
|
||||
ftype dEyx = Ey[i + px] - Ey[i];
|
||||
ftype dEyz = Ey[i + pz] - Ey[i];
|
||||
|
||||
ftype dEzx = Ez[i + px] - Ez[i];
|
||||
ftype dEzy = Ez[i + py] - Ez[i];
|
||||
|
||||
{%- if do_poynting %}
|
||||
|
||||
|
||||
/*
|
||||
* Precalculate averaged E
|
||||
*/
|
||||
ftype aExy = Ex[i + py] + Ex[i];
|
||||
ftype aExz = Ex[i + pz] + Ex[i];
|
||||
|
||||
ftype aEyx = Ey[i + px] + Ey[i];
|
||||
ftype aEyz = Ey[i + pz] + Ey[i];
|
||||
|
||||
ftype aEzx = Ez[i + px] + Ez[i];
|
||||
ftype aEzy = Ez[i + py] + Ez[i];
|
||||
{%- endif %}
|
||||
|
||||
|
||||
/*
|
||||
* PML Update
|
||||
*/
|
||||
// PML contributions to H
|
||||
ftype pHxi = 0;
|
||||
ftype pHyi = 0;
|
||||
ftype pHzi = 0;
|
||||
|
||||
{%- for r, p in pmls -%}
|
||||
{%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
|
||||
{%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%}
|
||||
{%- if r != 'y' -%}
|
||||
{%- set se, sh = '-', '+' -%}
|
||||
{%- else -%}
|
||||
{%- set se, sh = '+', '-' -%}
|
||||
{%- endif -%}
|
||||
|
||||
{%- if p == 'n' %}
|
||||
|
||||
if ( {{r}} < pml_thickness ) {
|
||||
const size_t ir = {{r}}; // index into pml parameters
|
||||
|
||||
{%- elif p == 'p' %}
|
||||
|
||||
if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
|
||||
const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters
|
||||
|
||||
{%- endif %}
|
||||
const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi
|
||||
{{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}};
|
||||
{{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}};
|
||||
pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
|
||||
pH{{v}}i {{se}}= {{psi ~ v}}[ip];
|
||||
}
|
||||
{%- endfor %}
|
||||
|
||||
/*
|
||||
* Update H
|
||||
*/
|
||||
{% if do_poynting -%}
|
||||
// Save old H for averaging
|
||||
ftype Hx_old = Hx[i];
|
||||
ftype Hy_old = Hy[i];
|
||||
ftype Hz_old = Hz[i];
|
||||
{%- endif %}
|
||||
|
||||
// H update equations
|
||||
Hx[i] -= dt * (dEzy - dEyz + pHxi);
|
||||
Hy[i] -= dt * (dExz - dEzx + pHyi);
|
||||
Hz[i] -= dt * (dEyx - dExy + pHzi);
|
||||
|
||||
{% if do_poynting -%}
|
||||
// Average H across timesteps
|
||||
ftype aHxt = Hx[i] + Hx_old;
|
||||
ftype aHyt = Hy[i] + Hy_old;
|
||||
ftype aHzt = Hz[i] + Hz_old;
|
||||
|
||||
/*
|
||||
* Calculate unscaled S components at H locations
|
||||
*/
|
||||
__global ftype *oSxy = oS + 0 * field_size;
|
||||
__global ftype *oSyz = oS + 1 * field_size;
|
||||
__global ftype *oSzx = oS + 2 * field_size;
|
||||
__global ftype *oSxz = oS + 3 * field_size;
|
||||
__global ftype *oSyx = oS + 4 * field_size;
|
||||
__global ftype *oSzy = oS + 5 * field_size;
|
||||
|
||||
oSxy[i] = aEyx * aHzt;
|
||||
oSxz[i] = -aEzx * aHyt;
|
||||
oSyz[i] = aEzy * aHxt;
|
||||
oSyx[i] = -aExy * aHzt;
|
||||
oSzx[i] = aExz * aHyt;
|
||||
oSzy[i] = -aEyz * aHxt;
|
||||
{%- endif -%}
|
||||
36
fdtd/kernels/update_s.cl
Normal file
36
fdtd/kernels/update_s.cl
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Update E-field, including any PMLs.
|
||||
*
|
||||
* Template parameters:
|
||||
* common_header: Rendered contents of common.cl
|
||||
* pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
|
||||
* pml_thickness: Number of cells (integer)
|
||||
*
|
||||
* OpenCL args:
|
||||
* E, H, dt, S, oS
|
||||
*/
|
||||
|
||||
{{common_header}}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
/*
|
||||
* Calculate S from oS (pre-calculated components)
|
||||
*/
|
||||
__global ftype *Sx = S + XX;
|
||||
__global ftype *Sy = S + YY;
|
||||
__global ftype *Sz = S + ZZ;
|
||||
|
||||
// Use unscaled S components from H locations
|
||||
__global ftype *oSxy = oS + 0 * field_size;
|
||||
__global ftype *oSyz = oS + 1 * field_size;
|
||||
__global ftype *oSzx = oS + 2 * field_size;
|
||||
__global ftype *oSxz = oS + 3 * field_size;
|
||||
__global ftype *oSyx = oS + 4 * field_size;
|
||||
__global ftype *oSzy = oS + 5 * field_size;
|
||||
|
||||
ftype s_factor = dt * 0.125;
|
||||
Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor;
|
||||
Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor;
|
||||
Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor;
|
||||
Loading…
Add table
Add a link
Reference in a new issue