You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
opencl_fdtd/fdtd/kernels/update_e_full.cl

112 lines
3.3 KiB
Common Lisp

//CL//
/*
* Update E-field, including any PMLs.
*
* Template parameters:
* common_header: Rendered contents of common.cl
* pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
* pml_thickness: Number of cells (integer)
*
* OpenCL args:
* E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
*/
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
#define read_field(E, x, y, z) read_imagef(E, sampler, (int4)(z, y, x, 0))
#define write_field(E, x, y, z, E__) write_imagef(E, (int4)(z, y, x, 0), E__)
const sampler_t sampler = CLK_FILTER_NEAREST | \
CLK_NORMALIZED_COORDS_FALSE | \
CLK_ADDRESS_NONE;
__kernel void update_e(
__global float* E,
__read_only image3d_t H,
const float dt,
__read_only image3d_t eps,
const size_t n
) {
size_t lid = get_local_id(0);
size_t gsize = get_global_size(0);
size_t work_group_start = get_local_size(0) * get_group_id(0);
size_t i;
for (i = work_group_start + lid; i < n; i += gsize) {
{{common_header | indent(8, False)}}
////////////////////////////////////////////////////////////////////////////
float4 eps__ = read_field(eps, x, y, z);
float4 H__ = read_field(H, x, y, z);
float4 Hmx = read_field(H, x + mx, y, z);
float4 Hmy = read_field(H, x, y + my, z);
float4 Hmz = read_field(H, x, y, z + mz);
{% if pmls -%}
const int pml_thickness = {{pml_thickness}};
{%- endif %}
/*
* Precalclate derivatives
*/
float dHxy = H__.x - Hmy.x;
float dHxz = H__.x - Hmz.x;
float dHyx = H__.y - Hmx.y;
float dHyz = H__.y - Hmz.y;
float dHzx = H__.z - Hmx.z;
float dHzy = H__.z - Hmy.z;
/*
* PML Update
*/
// PML effects on E
float pExi = 0;
float pEyi = 0;
float pEzi = 0;
{% for r, p in pmls -%}
{%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
{%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
{%- if r != 'y' -%}
{%- set se, sh = '-', '+' -%}
{%- else -%}
{%- set se, sh = '+', '-' -%}
{%- endif -%}
{%- if p == 'n' %}
if ( {{r}} < pml_thickness ) {
const size_t ir = {{r}}; // index into pml parameters
{%- elif p == 'p' %}
if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters
{%- endif %}
const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi
{{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
{{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
pE{{u}}i {{se}}= {{psi ~ u}}[ip];
pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
}
{%- endfor %}
/*
* Update E
*/
float4 E__ = vload4(i, E);
E__.x += dt / eps__.x * (dHzy - dHyz + pExi);
E__.y += dt / eps__.y * (dHxz - dHzx + pEyi);
E__.z += dt / eps__.z * (dHyx - dHxy + pEzi);
vstore4(E__, i, E);
}
}