You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
112 lines
3.3 KiB
Common Lisp
112 lines
3.3 KiB
Common Lisp
//CL//
|
|
/*
|
|
* Update E-field, including any PMLs.
|
|
*
|
|
* Template parameters:
|
|
* common_header: Rendered contents of common.cl
|
|
* pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
|
|
* pml_thickness: Number of cells (integer)
|
|
*
|
|
* OpenCL args:
|
|
* E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
|
|
*/
|
|
|
|
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
|
|
#define read_field(E, x, y, z) read_imagef(E, sampler, (int4)(z, y, x, 0))
|
|
#define write_field(E, x, y, z, E__) write_imagef(E, (int4)(z, y, x, 0), E__)
|
|
|
|
|
|
const sampler_t sampler = CLK_FILTER_NEAREST | \
|
|
CLK_NORMALIZED_COORDS_FALSE | \
|
|
CLK_ADDRESS_NONE;
|
|
|
|
|
|
__kernel void update_e(
|
|
__global float* E,
|
|
__read_only image3d_t H,
|
|
const float dt,
|
|
__read_only image3d_t eps,
|
|
const size_t n
|
|
) {
|
|
|
|
size_t lid = get_local_id(0);
|
|
size_t gsize = get_global_size(0);
|
|
size_t work_group_start = get_local_size(0) * get_group_id(0);
|
|
size_t i;
|
|
|
|
for (i = work_group_start + lid; i < n; i += gsize) {
|
|
|
|
{{common_header | indent(8, False)}}
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
float4 eps__ = read_field(eps, x, y, z);
|
|
float4 H__ = read_field(H, x, y, z);
|
|
float4 Hmx = read_field(H, x + mx, y, z);
|
|
float4 Hmy = read_field(H, x, y + my, z);
|
|
float4 Hmz = read_field(H, x, y, z + mz);
|
|
|
|
{% if pmls -%}
|
|
const int pml_thickness = {{pml_thickness}};
|
|
{%- endif %}
|
|
|
|
/*
|
|
* Precalclate derivatives
|
|
*/
|
|
float dHxy = H__.x - Hmy.x;
|
|
float dHxz = H__.x - Hmz.x;
|
|
|
|
float dHyx = H__.y - Hmx.y;
|
|
float dHyz = H__.y - Hmz.y;
|
|
|
|
float dHzx = H__.z - Hmx.z;
|
|
float dHzy = H__.z - Hmy.z;
|
|
|
|
/*
|
|
* PML Update
|
|
*/
|
|
// PML effects on E
|
|
float pExi = 0;
|
|
float pEyi = 0;
|
|
float pEzi = 0;
|
|
|
|
{% for r, p in pmls -%}
|
|
{%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
|
|
{%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
|
|
{%- if r != 'y' -%}
|
|
{%- set se, sh = '-', '+' -%}
|
|
{%- else -%}
|
|
{%- set se, sh = '+', '-' -%}
|
|
{%- endif -%}
|
|
|
|
{%- if p == 'n' %}
|
|
|
|
if ( {{r}} < pml_thickness ) {
|
|
const size_t ir = {{r}}; // index into pml parameters
|
|
|
|
{%- elif p == 'p' %}
|
|
|
|
if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
|
|
const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters
|
|
|
|
{%- endif %}
|
|
const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi
|
|
{{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
|
|
{{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
|
|
pE{{u}}i {{se}}= {{psi ~ u}}[ip];
|
|
pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
|
|
}
|
|
{%- endfor %}
|
|
|
|
|
|
/*
|
|
* Update E
|
|
*/
|
|
float4 E__ = vload4(i, E);
|
|
E__.x += dt / eps__.x * (dHzy - dHyz + pExi);
|
|
E__.y += dt / eps__.y * (dHxz - dHzx + pEyi);
|
|
E__.z += dt / eps__.z * (dHyx - dHxy + pEzi);
|
|
vstore4(E__, i, E);
|
|
}
|
|
}
|