diff --git a/fdtd/kernels/update_e_full.cl b/fdtd/kernels/update_e_full.cl new file mode 100644 index 0000000..c9bdbef --- /dev/null +++ b/fdtd/kernels/update_e_full.cl @@ -0,0 +1,111 @@ +//CL// +/* + * Update E-field, including any PMLs. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities + * pml_thickness: Number of cells (integer) + * + * OpenCL args: + * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] + */ + +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#define read_field(E, x, y, z) read_imagef(E, sampler, (int4)(z, y, x, 0)) +#define write_field(E, x, y, z, E__) write_imagef(E, (int4)(z, y, x, 0), E__) + + +const sampler_t sampler = CLK_FILTER_NEAREST | \ + CLK_NORMALIZED_COORDS_FALSE | \ + CLK_ADDRESS_NONE; + + +__kernel void update_e( + __global float* E, + __read_only image3d_t H, + const float dt, + __read_only image3d_t eps, + const size_t n + ) { + + size_t lid = get_local_id(0); + size_t gsize = get_global_size(0); + size_t work_group_start = get_local_size(0) * get_group_id(0); + size_t i; + + for (i = work_group_start + lid; i < n; i += gsize) { + + {{common_header | indent(8, False)}} + + //////////////////////////////////////////////////////////////////////////// + + float4 eps__ = read_field(eps, x, y, z); + float4 H__ = read_field(H, x, y, z); + float4 Hmx = read_field(H, x + mx, y, z); + float4 Hmy = read_field(H, x, y + my, z); + float4 Hmz = read_field(H, x, y, z + mz); + + {% if pmls -%} + const int pml_thickness = {{pml_thickness}}; + {%- endif %} + + /* + * Precalclate derivatives + */ + float dHxy = H__.x - Hmy.x; + float dHxz = H__.x - Hmz.x; + + float dHyx = H__.y - Hmx.y; + float dHyz = H__.y - Hmz.y; + + float dHzx = H__.z - Hmx.z; + float dHzy = H__.z - Hmy.z; + + /* + * PML Update + */ + // PML effects on E + float pExi = 0; + float pEyi = 0; + float pEzi = 0; + + {% for r, p in pmls -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} + {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%} + {%- if r != 'y' -%} + {%- set se, sh = '-', '+' -%} + {%- else -%} + {%- set se, sh = '+', '-' -%} + {%- endif -%} + + {%- if p == 'n' %} + + if ( {{r}} < pml_thickness ) { + const size_t ir = {{r}}; // index into pml parameters + + {%- elif p == 'p' %} + + if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { + const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters + + {%- endif %} + const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi + {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}}; + {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}}; + pE{{u}}i {{se}}= {{psi ~ u}}[ip]; + pE{{v}}i {{sh}}= {{psi ~ v}}[ip]; + } + {%- endfor %} + + + /* + * Update E + */ + float4 E__ = vload4(i, E); + E__.x += dt / eps__.x * (dHzy - dHyz + pExi); + E__.y += dt / eps__.y * (dHxz - dHzx + pEyi); + E__.z += dt / eps__.z * (dHyx - dHxy + pEzi); + vstore4(E__, i, E); + } +} diff --git a/fdtd/kernels/update_h_full.cl b/fdtd/kernels/update_h_full.cl new file mode 100644 index 0000000..bf0d1da --- /dev/null +++ b/fdtd/kernels/update_h_full.cl @@ -0,0 +1,139 @@ +//CL// +/* + * Update H-field, including any PMLs. + * Also precalculate values for poynting vector if necessary. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities + * pml_thickness: Number of cells (integer) + * do_poynting: Whether to precalculate poynting vector components (boolean) + * + * OpenCL args: + * E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS] + */ + +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#define read_field(E, x, y, z) read_imagef(E, sampler, (int4)(z, y, x, 0)) +#define write_field(E, x, y, z, E__) write_imagef(E, (int4)(z, y, x, 0), E__) + + +const sampler_t sampler = CLK_FILTER_NEAREST | \ + CLK_NORMALIZED_COORDS_FALSE | \ + CLK_ADDRESS_NONE; + + +__kernel void update_h( + __read_only image3d_t E, + __global float* H, + const float dt, + const size_t n + {% if pmls -%} + __global float* S, + {%- endif %} + ) { + + size_t lid = get_local_id(0); + size_t gsize = get_global_size(0); + size_t work_group_start = get_local_size(0) * get_group_id(0); + size_t i; + + for (i = work_group_start + lid; i < n; i += gsize) { + + {{common_header | indent(8, False)}} + + //////////////////////////////////////////////////////////////////////////// + + float4 E__ = read_field(E, x, y, z); + float4 Epx = read_field(E, x + px, y, z); + float4 Epy = read_field(E, x, y + py, z); + float4 Epz = read_field(E, x, y, z + pz); + + {% if pmls -%} + const int pml_thickness = {{pml_thickness}}; + {%- endif %} + + /* + * Precalculate derivatives + */ + float dExy = Epy.x - E__.x; + float dExz = Epz.x - E__.x; + + float dEyx = Epx.y - E__.y; + float dEyz = Epz.y - E__.y; + + float dEzx = Epx.z - E__.z; + float dEzy = Epy.z - E__.z; + + + /* + * PML Update + */ + // PML contributions to H + float pHxi = 0; + float pHyi = 0; + float pHzi = 0; + + {%- for r, p in pmls -%} + {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%} + {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%} + {%- if r != 'y' -%} + {%- set se, sh = '-', '+' -%} + {%- else -%} + {%- set se, sh = '+', '-' -%} + {%- endif -%} + + {%- if p == 'n' %} + + if ( {{r}} < pml_thickness ) { + const size_t ir = {{r}}; // index into pml parameters + + {%- elif p == 'p' %} + + if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) { + const size_t ir = (s{{r}} - 1) - {{r}}; // index into pml parameters + + {%- endif %} + const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}}; // linear index into Psi + {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}}; + {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}}; + pH{{u}}i {{sh}}= {{psi ~ u}}[ip]; + pH{{v}}i {{se}}= {{psi ~ v}}[ip]; + } + {%- endfor %} + + /* + * Update H + */ + {% if do_poynting -%} + // Save old H for averaging + float Hx_old = Hx[i]; + float Hy_old = Hy[i]; + float Hz_old = Hz[i]; + {%- endif %} + + // H update equations + float4 H__ = vload4(i, H); + H__.x -= dt * (dEzy - dEyz + pHxi); + H__.y -= dt * (dExz - dEzx + pHyi); + H__.z -= dt * (dEyx - dExy + pHzi); + vstore4(H__, i, H); + + {% if do_poynting -%} + // Average H across timesteps + float aHxt = Hx[i] + Hx_old; + float aHyt = Hy[i] + Hy_old; + float aHzt = Hz[i] + Hz_old; + + /* + * Calculate unscaled S components at H locations + */ + float4 S__; + S__.x = Epx.y * aHzt - Epx.z * aHyt; + S__.y = Epy.z * aHxt - Epy.x * aHzt; + S__.z = Epz.x * aHyt - Epz.y * aHxt; + + write_field(S, x, y, z, S__); + {%- endif -%} + } +} diff --git a/opencl_fdtd/version.py b/opencl_fdtd/version.py new file mode 100644 index 0000000..e69de29 diff --git a/saved_simulation b/saved_simulation new file mode 100644 index 0000000..5f1e18b Binary files /dev/null and b/saved_simulation differ diff --git a/sources.c b/sources.c new file mode 100644 index 0000000..14071ad --- /dev/null +++ b/sources.c @@ -0,0 +1,565 @@ +/* + * Update E-field, including any PMLs. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing + axes, polarities, and thicknesses. + * + * OpenCL args: + * E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E] + */ + + + +typedef float ftype; + + +/* + * Field size info + */ +const size_t sx = 395; +const size_t sy = 345; +const size_t sz = 73; +const size_t field_size = sx * sy * sz; + +//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if +// i is outside the bounds of Ex[]. +if (i >= field_size) { + PYOPENCL_ELWISE_CONTINUE; +} + + +/* + * Array indexing + */ +// Given a linear index i and shape (sx, sy, sz), defines x, y, and z +// as the 3D indices of the current element (i). +// (ie, converts linear index [i] to field indices (x, y, z) +const size_t x = i / (sz * sy); +const size_t y = (i - x * sz * sy) / sz; +const size_t z = (i - y * sz - x * sz * sy); + +// Calculate linear index offsets corresponding to offsets in 3D +// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z) +const size_t dix = sz * sy; +const size_t diy = sz; +const size_t diz = 1; + + +/* + * Pointer math + */ +//Pointer offsets into the components of a linearized vector-field +// (eg. Hx = H + XX, where H and Hx are pointers) +const size_t XX = 0; +const size_t YY = field_size; +const size_t ZZ = field_size * 2; + +//Define pointers to vector components of each field (eg. Hx = H + XX) +__global ftype *Ex = E + XX; +__global ftype *Ey = E + YY; +__global ftype *Ez = E + ZZ; + +__global ftype *Hx = H + XX; +__global ftype *Hy = H + YY; +__global ftype *Hz = H + ZZ; + + +/* + * Implement periodic boundary conditions + * + * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction. + * In the event that we start at x == 0, we actually want to wrap around and grab the cell + * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix . + * + * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction. + * In the event that we start at x == (sx - 1), we actually want to wrap around and grab + * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix . + */ + +int mx = -dix; +int px = +dix; +int wrap_x = (sx - 1) * dix; +if ( x == 0 ) { + mx = wrap_x; +} else if ( x == sx - 1 ) { + px = -wrap_x; +} + +int my = -diy; +int py = +diy; +int wrap_y = (sy - 1) * diy; +if ( y == 0 ) { + my = wrap_y; +} else if ( y == sy - 1 ) { + py = -wrap_y; +} + +int mz = -diz; +int pz = +diz; +int wrap_z = (sz - 1) * diz; +if ( z == 0 ) { + mz = wrap_z; +} else if ( z == sz - 1 ) { + pz = -wrap_z; +} + + +//////////////////////////////////////////////////////////////////////////// + +__global ftype *epsx = eps + XX; +__global ftype *epsy = eps + YY; +__global ftype *epsz = eps + ZZ; + + +/* + * Precalclate derivatives + */ +ftype dHxy = Hx[i] - Hx[i + my]; +ftype dHxz = Hx[i] - Hx[i + mz]; + +ftype dHyx = Hy[i] - Hy[i + mx]; +ftype dHyz = Hy[i] - Hy[i + mz]; + +ftype dHzx = Hz[i] - Hz[i + mx]; +ftype dHzy = Hz[i] - Hz[i + my]; + +/* + * PML Update + */ +// PML effects on E +ftype pExi = 0; +ftype pEyi = 0; +ftype pEzi = 0; + +int pml_xn_thickness = 8; + +if ( x < pml_xn_thickness ) { + const size_t ir = x; // index into pml parameters + const size_t ip = z + y * sz + ir * sz * sy; // linear index into Psi + Psi_xn_Ey[ip] = px0en[ir] * Psi_xn_Ey[ip] + px1en[ir] * dHzx; + Psi_xn_Ez[ip] = px0en[ir] * Psi_xn_Ez[ip] + px1en[ir] * dHyx; + pEyi -= Psi_xn_Ey[ip]; + pEzi += Psi_xn_Ez[ip]; +}int pml_xp_thickness = 8; + +if ( sx > x && x >= sx - pml_xp_thickness ) { + const size_t ir = (sx - 1) - x; // index into pml parameters + const size_t ip = z + y * sz + ir * sz * sy; // linear index into Psi + Psi_xp_Ey[ip] = px0ep[ir] * Psi_xp_Ey[ip] + px1ep[ir] * dHzx; + Psi_xp_Ez[ip] = px0ep[ir] * Psi_xp_Ez[ip] + px1ep[ir] * dHyx; + pEyi -= Psi_xp_Ey[ip]; + pEzi += Psi_xp_Ez[ip]; +}int pml_yn_thickness = 8; + +if ( y < pml_yn_thickness ) { + const size_t ir = y; // index into pml parameters + const size_t ip = z + x * sz + ir * sz * sx; // linear index into Psi + Psi_yn_Ex[ip] = py0en[ir] * Psi_yn_Ex[ip] + py1en[ir] * dHzy; + Psi_yn_Ez[ip] = py0en[ir] * Psi_yn_Ez[ip] + py1en[ir] * dHxy; + pExi += Psi_yn_Ex[ip]; + pEzi -= Psi_yn_Ez[ip]; +}int pml_yp_thickness = 8; + +if ( sy > y && y >= sy - pml_yp_thickness ) { + const size_t ir = (sy - 1) - y; // index into pml parameters + const size_t ip = z + x * sz + ir * sz * sx; // linear index into Psi + Psi_yp_Ex[ip] = py0ep[ir] * Psi_yp_Ex[ip] + py1ep[ir] * dHzy; + Psi_yp_Ez[ip] = py0ep[ir] * Psi_yp_Ez[ip] + py1ep[ir] * dHxy; + pExi += Psi_yp_Ex[ip]; + pEzi -= Psi_yp_Ez[ip]; +}int pml_zn_thickness = 8; + +if ( z < pml_zn_thickness ) { + const size_t ir = z; // index into pml parameters + const size_t ip = y + x * sy + ir * sy * sx; // linear index into Psi + Psi_zn_Ex[ip] = pz0en[ir] * Psi_zn_Ex[ip] + pz1en[ir] * dHyz; + Psi_zn_Ey[ip] = pz0en[ir] * Psi_zn_Ey[ip] + pz1en[ir] * dHxz; + pExi -= Psi_zn_Ex[ip]; + pEyi += Psi_zn_Ey[ip]; +}int pml_zp_thickness = 8; + +if ( sz > z && z >= sz - pml_zp_thickness ) { + const size_t ir = (sz - 1) - z; // index into pml parameters + const size_t ip = y + x * sy + ir * sy * sx; // linear index into Psi + Psi_zp_Ex[ip] = pz0ep[ir] * Psi_zp_Ex[ip] + pz1ep[ir] * dHyz; + Psi_zp_Ey[ip] = pz0ep[ir] * Psi_zp_Ey[ip] + pz1ep[ir] * dHxz; + pExi -= Psi_zp_Ex[ip]; + pEyi += Psi_zp_Ey[ip]; +} + +/* + * Update E + */ +Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi); +Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi); +Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi); +========================================== +/* + * Update H-field, including any PMLs. + * Also precalculate values for poynting vector if necessary. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing + axes, polarities, and thicknesses. + * do_poynting: Whether to precalculate poynting vector components (boolean) + * + * OpenCL args: + * E, H, dt, [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS] + */ + + + +typedef float ftype; + + +/* + * Field size info + */ +const size_t sx = 395; +const size_t sy = 345; +const size_t sz = 73; +const size_t field_size = sx * sy * sz; + +//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if +// i is outside the bounds of Ex[]. +if (i >= field_size) { + PYOPENCL_ELWISE_CONTINUE; +} + + +/* + * Array indexing + */ +// Given a linear index i and shape (sx, sy, sz), defines x, y, and z +// as the 3D indices of the current element (i). +// (ie, converts linear index [i] to field indices (x, y, z) +const size_t x = i / (sz * sy); +const size_t y = (i - x * sz * sy) / sz; +const size_t z = (i - y * sz - x * sz * sy); + +// Calculate linear index offsets corresponding to offsets in 3D +// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z) +const size_t dix = sz * sy; +const size_t diy = sz; +const size_t diz = 1; + + +/* + * Pointer math + */ +//Pointer offsets into the components of a linearized vector-field +// (eg. Hx = H + XX, where H and Hx are pointers) +const size_t XX = 0; +const size_t YY = field_size; +const size_t ZZ = field_size * 2; + +//Define pointers to vector components of each field (eg. Hx = H + XX) +__global ftype *Ex = E + XX; +__global ftype *Ey = E + YY; +__global ftype *Ez = E + ZZ; + +__global ftype *Hx = H + XX; +__global ftype *Hy = H + YY; +__global ftype *Hz = H + ZZ; + + +/* + * Implement periodic boundary conditions + * + * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction. + * In the event that we start at x == 0, we actually want to wrap around and grab the cell + * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix . + * + * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction. + * In the event that we start at x == (sx - 1), we actually want to wrap around and grab + * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix . + */ + +int mx = -dix; +int px = +dix; +int wrap_x = (sx - 1) * dix; +if ( x == 0 ) { + mx = wrap_x; +} else if ( x == sx - 1 ) { + px = -wrap_x; +} + +int my = -diy; +int py = +diy; +int wrap_y = (sy - 1) * diy; +if ( y == 0 ) { + my = wrap_y; +} else if ( y == sy - 1 ) { + py = -wrap_y; +} + +int mz = -diz; +int pz = +diz; +int wrap_z = (sz - 1) * diz; +if ( z == 0 ) { + mz = wrap_z; +} else if ( z == sz - 1 ) { + pz = -wrap_z; +} + + +//////////////////////////////////////////////////////////////////////////// + +/* + * Precalculate derivatives + */ +ftype dExy = Ex[i + py] - Ex[i]; +ftype dExz = Ex[i + pz] - Ex[i]; + +ftype dEyx = Ey[i + px] - Ey[i]; +ftype dEyz = Ey[i + pz] - Ey[i]; + +ftype dEzx = Ez[i + px] - Ez[i]; +ftype dEzy = Ez[i + py] - Ez[i]; + + +/* + * Precalculate averaged E + */ +ftype aExy = Ex[i + py] + Ex[i]; +ftype aExz = Ex[i + pz] + Ex[i]; + +ftype aEyx = Ey[i + px] + Ey[i]; +ftype aEyz = Ey[i + pz] + Ey[i]; + +ftype aEzx = Ez[i + px] + Ez[i]; +ftype aEzy = Ez[i + py] + Ez[i]; + + +/* + * PML Update + */ +// PML contributions to H +ftype pHxi = 0; +ftype pHyi = 0; +ftype pHzi = 0; + +int pml_xn_thickness = 8; + +if ( x < pml_xn_thickness ) { + const size_t ir = x; // index into pml parameters + const size_t ip = z + y * sz + ir * sz * sy; // linear index into Psi + Psi_xn_Hy[ip] = px0hn[ir] * Psi_xn_Hy[ip] + px1hn[ir] * dEzx; + Psi_xn_Hz[ip] = px0hn[ir] * Psi_xn_Hz[ip] + px1hn[ir] * dEyx; + pHyi += Psi_xn_Hy[ip]; + pHzi -= Psi_xn_Hz[ip]; +}int pml_xp_thickness = 8; + +if ( sx > x && x >= sx - pml_xp_thickness ) { + const size_t ir = (sx - 1) - x; // index into pml parameters + const size_t ip = z + y * sz + ir * sz * sy; // linear index into Psi + Psi_xp_Hy[ip] = px0hp[ir] * Psi_xp_Hy[ip] + px1hp[ir] * dEzx; + Psi_xp_Hz[ip] = px0hp[ir] * Psi_xp_Hz[ip] + px1hp[ir] * dEyx; + pHyi += Psi_xp_Hy[ip]; + pHzi -= Psi_xp_Hz[ip]; +}int pml_yn_thickness = 8; + +if ( y < pml_yn_thickness ) { + const size_t ir = y; // index into pml parameters + const size_t ip = z + x * sz + ir * sz * sx; // linear index into Psi + Psi_yn_Hx[ip] = py0hn[ir] * Psi_yn_Hx[ip] + py1hn[ir] * dEzy; + Psi_yn_Hz[ip] = py0hn[ir] * Psi_yn_Hz[ip] + py1hn[ir] * dExy; + pHxi -= Psi_yn_Hx[ip]; + pHzi += Psi_yn_Hz[ip]; +}int pml_yp_thickness = 8; + +if ( sy > y && y >= sy - pml_yp_thickness ) { + const size_t ir = (sy - 1) - y; // index into pml parameters + const size_t ip = z + x * sz + ir * sz * sx; // linear index into Psi + Psi_yp_Hx[ip] = py0hp[ir] * Psi_yp_Hx[ip] + py1hp[ir] * dEzy; + Psi_yp_Hz[ip] = py0hp[ir] * Psi_yp_Hz[ip] + py1hp[ir] * dExy; + pHxi -= Psi_yp_Hx[ip]; + pHzi += Psi_yp_Hz[ip]; +}int pml_zn_thickness = 8; + +if ( z < pml_zn_thickness ) { + const size_t ir = z; // index into pml parameters + const size_t ip = y + x * sy + ir * sy * sx; // linear index into Psi + Psi_zn_Hx[ip] = pz0hn[ir] * Psi_zn_Hx[ip] + pz1hn[ir] * dEyz; + Psi_zn_Hy[ip] = pz0hn[ir] * Psi_zn_Hy[ip] + pz1hn[ir] * dExz; + pHxi += Psi_zn_Hx[ip]; + pHyi -= Psi_zn_Hy[ip]; +}int pml_zp_thickness = 8; + +if ( sz > z && z >= sz - pml_zp_thickness ) { + const size_t ir = (sz - 1) - z; // index into pml parameters + const size_t ip = y + x * sy + ir * sy * sx; // linear index into Psi + Psi_zp_Hx[ip] = pz0hp[ir] * Psi_zp_Hx[ip] + pz1hp[ir] * dEyz; + Psi_zp_Hy[ip] = pz0hp[ir] * Psi_zp_Hy[ip] + pz1hp[ir] * dExz; + pHxi += Psi_zp_Hx[ip]; + pHyi -= Psi_zp_Hy[ip]; +} + +/* + * Update H + */ +// Save old H for averaging +ftype Hx_old = Hx[i]; +ftype Hy_old = Hy[i]; +ftype Hz_old = Hz[i]; + +// H update equations +Hx[i] -= dt * (dEzy - dEyz + pHxi); +Hy[i] -= dt * (dExz - dEzx + pHyi); +Hz[i] -= dt * (dEyx - dExy + pHzi); + +// Average H across timesteps +ftype aHxt = Hx[i] + Hx_old; +ftype aHyt = Hy[i] + Hy_old; +ftype aHzt = Hz[i] + Hz_old; + +/* + * Calculate unscaled S components at H locations + */ +__global ftype *oSxy = oS + 0 * field_size; +__global ftype *oSyz = oS + 1 * field_size; +__global ftype *oSzx = oS + 2 * field_size; +__global ftype *oSxz = oS + 3 * field_size; +__global ftype *oSyx = oS + 4 * field_size; +__global ftype *oSzy = oS + 5 * field_size; + +oSxy[i] = aEyx * aHzt; +oSxz[i] = -aEzx * aHyt; +oSyz[i] = aEzy * aHxt; +oSyx[i] = -aExy * aHzt; +oSzx[i] = aExz * aHyt; +oSzy[i] = -aEyz * aHxt; +========================================== +/* + * Update E-field, including any PMLs. + * + * Template parameters: + * common_header: Rendered contents of common.cl + * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities + * pml_thickness: Number of cells (integer) + * + * OpenCL args: + * E, H, dt, S, oS + */ + + + +typedef float ftype; + + +/* + * Field size info + */ +const size_t sx = 395; +const size_t sy = 345; +const size_t sz = 73; +const size_t field_size = sx * sy * sz; + +//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if +// i is outside the bounds of Ex[]. +if (i >= field_size) { + PYOPENCL_ELWISE_CONTINUE; +} + + +/* + * Array indexing + */ +// Given a linear index i and shape (sx, sy, sz), defines x, y, and z +// as the 3D indices of the current element (i). +// (ie, converts linear index [i] to field indices (x, y, z) +const size_t x = i / (sz * sy); +const size_t y = (i - x * sz * sy) / sz; +const size_t z = (i - y * sz - x * sz * sy); + +// Calculate linear index offsets corresponding to offsets in 3D +// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z) +const size_t dix = sz * sy; +const size_t diy = sz; +const size_t diz = 1; + + +/* + * Pointer math + */ +//Pointer offsets into the components of a linearized vector-field +// (eg. Hx = H + XX, where H and Hx are pointers) +const size_t XX = 0; +const size_t YY = field_size; +const size_t ZZ = field_size * 2; + +//Define pointers to vector components of each field (eg. Hx = H + XX) +__global ftype *Ex = E + XX; +__global ftype *Ey = E + YY; +__global ftype *Ez = E + ZZ; + +__global ftype *Hx = H + XX; +__global ftype *Hy = H + YY; +__global ftype *Hz = H + ZZ; + + +/* + * Implement periodic boundary conditions + * + * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction. + * In the event that we start at x == 0, we actually want to wrap around and grab the cell + * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix . + * + * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction. + * In the event that we start at x == (sx - 1), we actually want to wrap around and grab + * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix . + */ + +int mx = -dix; +int px = +dix; +int wrap_x = (sx - 1) * dix; +if ( x == 0 ) { + mx = wrap_x; +} else if ( x == sx - 1 ) { + px = -wrap_x; +} + +int my = -diy; +int py = +diy; +int wrap_y = (sy - 1) * diy; +if ( y == 0 ) { + my = wrap_y; +} else if ( y == sy - 1 ) { + py = -wrap_y; +} + +int mz = -diz; +int pz = +diz; +int wrap_z = (sz - 1) * diz; +if ( z == 0 ) { + mz = wrap_z; +} else if ( z == sz - 1 ) { + pz = -wrap_z; +} + + +////////////////////////////////////////////////////////////////////// + + +/* + * Calculate S from oS (pre-calculated components) + */ +__global ftype *Sx = S + XX; +__global ftype *Sy = S + YY; +__global ftype *Sz = S + ZZ; + +// Use unscaled S components from H locations +__global ftype *oSxy = oS + 0 * field_size; +__global ftype *oSyz = oS + 1 * field_size; +__global ftype *oSzx = oS + 2 * field_size; +__global ftype *oSxz = oS + 3 * field_size; +__global ftype *oSyx = oS + 4 * field_size; +__global ftype *oSzy = oS + 5 * field_size; + +ftype s_factor = dt * 0.125; +Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor; +Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor; +Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor; \ No newline at end of file