//CL// /* * Update E-field, including any PMLs. * * Template parameters: * common_header: Rendered contents of common.cl * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities * pml_thickness: Number of cells (integer) * * OpenCL args: * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] */ #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #define read_field(E, x, y, z) read_imagef(E, sampler, (int4)(z, y, x, 0)) #define write_field(E, x, y, z, E__) write_imagef(E, (int4)(z, y, x, 0), E__) const sampler_t sampler = CLK_FILTER_NEAREST | \ CLK_NORMALIZED_COORDS_FALSE | \ CLK_ADDRESS_NONE; __kernel void update_e( __global float* E, __read_only image3d_t H, const float dt, __read_only image3d_t eps, const size_t n ) { size_t lid = get_local_id(0); size_t gsize = get_global_size(0); size_t work_group_start = get_local_size(0) * get_group_id(0); size_t i; for (i = work_group_start + lid; i < n; i += gsize) { {{common_header | indent(8, False)}} //////////////////////////////////////////////////////////////////////////// float4 eps__ = read_field(eps, x, y, z); float4 H__ = read_field(H, x, y, z); float4 Hmx = read_field(H, x + mx, y, z); float4 Hmy = read_field(H, x, y + my, z); float4 Hmz = read_field(H, x, y, z + mz); {% if pmls -%} const int pml_thickness = {{pml_thickness}}; {%- endif %} /* * Precalclate derivatives */ float dHxy = H__.x - Hmy.x; float dHxz = H__.x - Hmz.x; float dHyx = H__.y - Hmx.y; float dHyz = H__.y - Hmz.y; float dHzx = H__.z - Hmx.z; float dHzy = H__.z - Hmy.z; /* * PML Update */ // PML effects on E float pExi = 0; float pEyi = 0; float pEzi = 0; {% for r, p in pmls -%} {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%} {%- if r != 'y' -%} {%- set se, sh = '-', '+' -%} {%- else -%} {%- set se, sh = '+', '-' -%} {%- endif -%} {%- if p == 'n' %} if ( {{r}} < pml_thickness ) { const size_t ir = {{r}}; // index into pml parameters {%- elif p == 'p' %} if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters {%- endif %} const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}}; {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}}; pE{{u}}i {{se}}= {{psi ~ u}}[ip]; pE{{v}}i {{sh}}= {{psi ~ v}}[ip]; } {%- endfor %} /* * Update E */ float4 E__ = vload4(i, E); E__.x += dt / eps__.x * (dHzy - dHyz + pExi); E__.y += dt / eps__.y * (dHxz - dHzx + pEyi); E__.z += dt / eps__.z * (dHyx - dHxy + pEzi); vstore4(E__, i, E); } }