snapshot 2022-11-24 22:29:08.023001
This commit is contained in:
		
							parent
							
								
									689b3176cc
								
							
						
					
					
						commit
						5805f24ece
					
				
							
								
								
									
										111
									
								
								fdtd/kernels/update_e_full.cl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								fdtd/kernels/update_e_full.cl
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,111 @@
 | 
			
		||||
//CL//
 | 
			
		||||
/*
 | 
			
		||||
 *  Update E-field, including any PMLs.
 | 
			
		||||
 *
 | 
			
		||||
 *  Template parameters:
 | 
			
		||||
 *   common_header: Rendered contents of common.cl
 | 
			
		||||
 *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
 | 
			
		||||
 *   pml_thickness: Number of cells (integer)
 | 
			
		||||
 *
 | 
			
		||||
 *  OpenCL args:
 | 
			
		||||
 *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
 | 
			
		||||
#define read_field(E, x, y, z) read_imagef(E, sampler, (int4)(z, y, x, 0))
 | 
			
		||||
#define write_field(E, x, y, z, E__) write_imagef(E, (int4)(z, y, x, 0), E__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
const sampler_t sampler = CLK_FILTER_NEAREST | \
 | 
			
		||||
                          CLK_NORMALIZED_COORDS_FALSE | \
 | 
			
		||||
                          CLK_ADDRESS_NONE;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__kernel void update_e(
 | 
			
		||||
        __global float* E,
 | 
			
		||||
        __read_only image3d_t H,
 | 
			
		||||
        const float dt,
 | 
			
		||||
        __read_only image3d_t eps,
 | 
			
		||||
        const size_t n
 | 
			
		||||
        ) {
 | 
			
		||||
 | 
			
		||||
    size_t lid = get_local_id(0);
 | 
			
		||||
    size_t gsize = get_global_size(0);
 | 
			
		||||
    size_t work_group_start = get_local_size(0) * get_group_id(0);
 | 
			
		||||
    size_t i;
 | 
			
		||||
 | 
			
		||||
    for (i = work_group_start + lid; i < n; i += gsize) {
 | 
			
		||||
 | 
			
		||||
        {{common_header | indent(8, False)}}
 | 
			
		||||
 | 
			
		||||
        ////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
        float4 eps__ = read_field(eps, x, y, z);
 | 
			
		||||
        float4 H__ = read_field(H, x, y, z);
 | 
			
		||||
        float4 Hmx = read_field(H, x + mx, y, z);
 | 
			
		||||
        float4 Hmy = read_field(H, x, y + my, z);
 | 
			
		||||
        float4 Hmz = read_field(H, x, y, z + mz);
 | 
			
		||||
 | 
			
		||||
        {% if pmls -%}
 | 
			
		||||
        const int pml_thickness = {{pml_thickness}};
 | 
			
		||||
        {%- endif %}
 | 
			
		||||
 | 
			
		||||
        /*
 | 
			
		||||
         *   Precalclate derivatives
 | 
			
		||||
         */
 | 
			
		||||
        float dHxy = H__.x - Hmy.x;
 | 
			
		||||
        float dHxz = H__.x - Hmz.x;
 | 
			
		||||
 | 
			
		||||
        float dHyx = H__.y - Hmx.y;
 | 
			
		||||
        float dHyz = H__.y - Hmz.y;
 | 
			
		||||
 | 
			
		||||
        float dHzx = H__.z - Hmx.z;
 | 
			
		||||
        float dHzy = H__.z - Hmy.z;
 | 
			
		||||
 | 
			
		||||
        /*
 | 
			
		||||
         *   PML Update
 | 
			
		||||
         */
 | 
			
		||||
        // PML effects on E
 | 
			
		||||
        float pExi = 0;
 | 
			
		||||
        float pEyi = 0;
 | 
			
		||||
        float pEzi = 0;
 | 
			
		||||
 | 
			
		||||
        {% for r, p in pmls -%}
 | 
			
		||||
            {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
 | 
			
		||||
            {%- set psi = 'Psi_' ~ r ~ p ~ '_E' -%}
 | 
			
		||||
            {%- if r != 'y' -%}
 | 
			
		||||
                {%- set se, sh = '-', '+' -%}
 | 
			
		||||
            {%- else -%}
 | 
			
		||||
                {%- set se, sh = '+', '-' -%}
 | 
			
		||||
            {%- endif -%}
 | 
			
		||||
 | 
			
		||||
            {%- if p == 'n' %}
 | 
			
		||||
 | 
			
		||||
        if ( {{r}} < pml_thickness ) {
 | 
			
		||||
            const size_t ir = {{r}};                          // index into pml parameters
 | 
			
		||||
 | 
			
		||||
            {%- elif p == 'p' %}
 | 
			
		||||
 | 
			
		||||
        if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
 | 
			
		||||
            const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
 | 
			
		||||
 | 
			
		||||
            {%- endif %}
 | 
			
		||||
            const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
 | 
			
		||||
            {{psi ~ u}}[ip] = p0e{{p}}[ir] * {{psi ~ u}}[ip] + p1e{{p}}[ir] * dH{{v ~ r}};
 | 
			
		||||
            {{psi ~ v}}[ip] = p0e{{p}}[ir] * {{psi ~ v}}[ip] + p1e{{p}}[ir] * dH{{u ~ r}};
 | 
			
		||||
            pE{{u}}i {{se}}= {{psi ~ u}}[ip];
 | 
			
		||||
            pE{{v}}i {{sh}}= {{psi ~ v}}[ip];
 | 
			
		||||
        }
 | 
			
		||||
        {%- endfor %}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        /*
 | 
			
		||||
         *  Update E
 | 
			
		||||
         */
 | 
			
		||||
        float4 E__ = vload4(i, E);
 | 
			
		||||
        E__.x += dt / eps__.x * (dHzy - dHyz + pExi);
 | 
			
		||||
        E__.y += dt / eps__.y * (dHxz - dHzx + pEyi);
 | 
			
		||||
        E__.z += dt / eps__.z * (dHyx - dHxy + pEzi);
 | 
			
		||||
        vstore4(E__, i, E);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										139
									
								
								fdtd/kernels/update_h_full.cl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								fdtd/kernels/update_h_full.cl
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,139 @@
 | 
			
		||||
//CL//
 | 
			
		||||
/*
 | 
			
		||||
 *  Update H-field, including any PMLs.
 | 
			
		||||
 *   Also precalculate values for poynting vector if necessary.
 | 
			
		||||
 *
 | 
			
		||||
 *  Template parameters:
 | 
			
		||||
 *   common_header: Rendered contents of common.cl
 | 
			
		||||
 *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
 | 
			
		||||
 *   pml_thickness: Number of cells (integer)
 | 
			
		||||
 *   do_poynting: Whether to precalculate poynting vector components (boolean)
 | 
			
		||||
 *
 | 
			
		||||
 *  OpenCL args:
 | 
			
		||||
 *   E, H, dt, [p{01}h{np}, Psi_{xyz}{np}_H], [oS]
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
 | 
			
		||||
#define read_field(E, x, y, z) read_imagef(E, sampler, (int4)(z, y, x, 0))
 | 
			
		||||
#define write_field(E, x, y, z, E__) write_imagef(E, (int4)(z, y, x, 0), E__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
const sampler_t sampler = CLK_FILTER_NEAREST | \
 | 
			
		||||
                          CLK_NORMALIZED_COORDS_FALSE | \
 | 
			
		||||
                          CLK_ADDRESS_NONE;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__kernel void update_h(
 | 
			
		||||
        __read_only image3d_t E,
 | 
			
		||||
        __global float* H,
 | 
			
		||||
        const float dt,
 | 
			
		||||
        const size_t n
 | 
			
		||||
        {% if pmls -%}
 | 
			
		||||
        __global float* S,
 | 
			
		||||
        {%- endif %}
 | 
			
		||||
        ) {
 | 
			
		||||
 | 
			
		||||
    size_t lid = get_local_id(0);
 | 
			
		||||
    size_t gsize = get_global_size(0);
 | 
			
		||||
    size_t work_group_start = get_local_size(0) * get_group_id(0);
 | 
			
		||||
    size_t i;
 | 
			
		||||
 | 
			
		||||
    for (i = work_group_start + lid; i < n; i += gsize) {
 | 
			
		||||
 | 
			
		||||
        {{common_header | indent(8, False)}}
 | 
			
		||||
 | 
			
		||||
        ////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
        float4 E__ = read_field(E, x, y, z);
 | 
			
		||||
        float4 Epx = read_field(E, x + px, y, z);
 | 
			
		||||
        float4 Epy = read_field(E, x, y + py, z);
 | 
			
		||||
        float4 Epz = read_field(E, x, y, z + pz);
 | 
			
		||||
 | 
			
		||||
        {% if pmls -%}
 | 
			
		||||
        const int pml_thickness = {{pml_thickness}};
 | 
			
		||||
        {%- endif %}
 | 
			
		||||
 | 
			
		||||
        /*
 | 
			
		||||
         *  Precalculate derivatives
 | 
			
		||||
         */
 | 
			
		||||
        float dExy = Epy.x - E__.x;
 | 
			
		||||
        float dExz = Epz.x - E__.x;
 | 
			
		||||
 | 
			
		||||
        float dEyx = Epx.y - E__.y;
 | 
			
		||||
        float dEyz = Epz.y - E__.y;
 | 
			
		||||
 | 
			
		||||
        float dEzx = Epx.z - E__.z;
 | 
			
		||||
        float dEzy = Epy.z - E__.z;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        /*
 | 
			
		||||
         *  PML Update
 | 
			
		||||
         */
 | 
			
		||||
        // PML contributions to H
 | 
			
		||||
        float pHxi = 0;
 | 
			
		||||
        float pHyi = 0;
 | 
			
		||||
        float pHzi = 0;
 | 
			
		||||
 | 
			
		||||
        {%- for r, p in pmls -%}
 | 
			
		||||
            {%- set u, v = ['x', 'y', 'z'] | reject('equalto', r) -%}
 | 
			
		||||
            {%- set psi = 'Psi_' ~ r ~ p ~ '_H' -%}
 | 
			
		||||
            {%- if r != 'y' -%}
 | 
			
		||||
                {%- set se, sh = '-', '+' -%}
 | 
			
		||||
            {%- else -%}
 | 
			
		||||
                {%- set se, sh = '+', '-' -%}
 | 
			
		||||
            {%- endif -%}
 | 
			
		||||
 | 
			
		||||
            {%- if p == 'n' %}
 | 
			
		||||
 | 
			
		||||
        if ( {{r}} < pml_thickness ) {
 | 
			
		||||
            const size_t ir = {{r}};                          // index into pml parameters
 | 
			
		||||
 | 
			
		||||
            {%- elif p == 'p' %}
 | 
			
		||||
 | 
			
		||||
        if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_thickness ) {
 | 
			
		||||
            const size_t ir = (s{{r}} - 1) - {{r}};                     // index into pml parameters
 | 
			
		||||
 | 
			
		||||
            {%- endif %}
 | 
			
		||||
            const size_t ip = {{v}} + {{u}} * s{{v}} + ir * s{{v}} * s{{u}};  // linear index into Psi
 | 
			
		||||
            {{psi ~ u}}[ip] = p0h{{p}}[ir] * {{psi ~ u}}[ip] + p1h{{p}}[ir] * dE{{v ~ r}};
 | 
			
		||||
            {{psi ~ v}}[ip] = p0h{{p}}[ir] * {{psi ~ v}}[ip] + p1h{{p}}[ir] * dE{{u ~ r}};
 | 
			
		||||
            pH{{u}}i {{sh}}= {{psi ~ u}}[ip];
 | 
			
		||||
            pH{{v}}i {{se}}= {{psi ~ v}}[ip];
 | 
			
		||||
        }
 | 
			
		||||
        {%- endfor %}
 | 
			
		||||
 | 
			
		||||
        /*
 | 
			
		||||
         *  Update H
 | 
			
		||||
         */
 | 
			
		||||
        {% if do_poynting -%}
 | 
			
		||||
        // Save old H for averaging
 | 
			
		||||
        float Hx_old = Hx[i];
 | 
			
		||||
        float Hy_old = Hy[i];
 | 
			
		||||
        float Hz_old = Hz[i];
 | 
			
		||||
        {%- endif %}
 | 
			
		||||
 | 
			
		||||
        // H update equations
 | 
			
		||||
        float4 H__ = vload4(i, H);
 | 
			
		||||
        H__.x -= dt * (dEzy - dEyz + pHxi);
 | 
			
		||||
        H__.y -= dt * (dExz - dEzx + pHyi);
 | 
			
		||||
        H__.z -= dt * (dEyx - dExy + pHzi);
 | 
			
		||||
        vstore4(H__, i, H);
 | 
			
		||||
 | 
			
		||||
        {% if do_poynting -%}
 | 
			
		||||
        // Average H across timesteps
 | 
			
		||||
        float aHxt = Hx[i] + Hx_old;
 | 
			
		||||
        float aHyt = Hy[i] + Hy_old;
 | 
			
		||||
        float aHzt = Hz[i] + Hz_old;
 | 
			
		||||
 | 
			
		||||
        /*
 | 
			
		||||
         *  Calculate unscaled S components at H locations
 | 
			
		||||
         */
 | 
			
		||||
        float4 S__;
 | 
			
		||||
        S__.x = Epx.y * aHzt - Epx.z * aHyt;
 | 
			
		||||
        S__.y = Epy.z * aHxt - Epy.x * aHzt;
 | 
			
		||||
        S__.z = Epz.x * aHyt - Epz.y * aHxt;
 | 
			
		||||
 | 
			
		||||
        write_field(S, x, y, z, S__);
 | 
			
		||||
        {%- endif -%}
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										0
									
								
								opencl_fdtd/version.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								opencl_fdtd/version.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										
											BIN
										
									
								
								saved_simulation
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								saved_simulation
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										565
									
								
								sources.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										565
									
								
								sources.c
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,565 @@
 | 
			
		||||
/*
 | 
			
		||||
 *  Update E-field, including any PMLs.
 | 
			
		||||
 *
 | 
			
		||||
 *  Template parameters:
 | 
			
		||||
 *   common_header: Rendered contents of common.cl
 | 
			
		||||
 *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
 | 
			
		||||
        axes, polarities, and thicknesses.
 | 
			
		||||
 *
 | 
			
		||||
 *  OpenCL args:
 | 
			
		||||
 *   E, H, dt, eps, [p{01}e{np}, Psi_{xyz}{np}_E]
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef float ftype;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Field size info
 | 
			
		||||
 */
 | 
			
		||||
const size_t sx = 395;
 | 
			
		||||
const size_t sy = 345;
 | 
			
		||||
const size_t sz = 73;
 | 
			
		||||
const size_t field_size = sx * sy * sz;
 | 
			
		||||
 | 
			
		||||
//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
 | 
			
		||||
// i is outside the bounds of Ex[].
 | 
			
		||||
if (i >= field_size) {
 | 
			
		||||
    PYOPENCL_ELWISE_CONTINUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Array indexing
 | 
			
		||||
 */
 | 
			
		||||
// Given a linear index i and shape (sx, sy, sz), defines x, y, and z
 | 
			
		||||
//  as the 3D indices of the current element (i).
 | 
			
		||||
// (ie, converts linear index [i] to field indices (x, y, z)
 | 
			
		||||
const size_t x = i / (sz * sy);
 | 
			
		||||
const size_t y = (i - x * sz * sy) / sz;
 | 
			
		||||
const size_t z = (i - y * sz - x * sz * sy);
 | 
			
		||||
 | 
			
		||||
// Calculate linear index offsets corresponding to offsets in 3D
 | 
			
		||||
// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z)
 | 
			
		||||
const size_t dix = sz * sy;
 | 
			
		||||
const size_t diy = sz;
 | 
			
		||||
const size_t diz = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Pointer math
 | 
			
		||||
 */
 | 
			
		||||
//Pointer offsets into the components of a linearized vector-field
 | 
			
		||||
// (eg. Hx = H + XX, where H and Hx are pointers)
 | 
			
		||||
const size_t XX = 0;
 | 
			
		||||
const size_t YY = field_size;
 | 
			
		||||
const size_t ZZ = field_size * 2;
 | 
			
		||||
 | 
			
		||||
//Define pointers to vector components of each field (eg. Hx = H + XX)
 | 
			
		||||
__global ftype *Ex = E + XX;
 | 
			
		||||
__global ftype *Ey = E + YY;
 | 
			
		||||
__global ftype *Ez = E + ZZ;
 | 
			
		||||
 | 
			
		||||
__global ftype *Hx = H + XX;
 | 
			
		||||
__global ftype *Hy = H + YY;
 | 
			
		||||
__global ftype *Hz = H + ZZ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Implement periodic boundary conditions
 | 
			
		||||
 *
 | 
			
		||||
 * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction.
 | 
			
		||||
 * In the event that we start at x == 0, we actually want to wrap around and grab the cell
 | 
			
		||||
 * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix .
 | 
			
		||||
 *
 | 
			
		||||
 * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction.
 | 
			
		||||
 * In the event that we start at x == (sx - 1), we actually want to wrap around and grab
 | 
			
		||||
 * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
int mx = -dix;
 | 
			
		||||
int px = +dix;
 | 
			
		||||
int wrap_x = (sx - 1) * dix;
 | 
			
		||||
if ( x == 0 ) {
 | 
			
		||||
  mx = wrap_x;
 | 
			
		||||
} else if ( x == sx - 1 ) {
 | 
			
		||||
  px = -wrap_x;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
int my = -diy;
 | 
			
		||||
int py = +diy;
 | 
			
		||||
int wrap_y = (sy - 1) * diy;
 | 
			
		||||
if ( y == 0 ) {
 | 
			
		||||
  my = wrap_y;
 | 
			
		||||
} else if ( y == sy - 1 ) {
 | 
			
		||||
  py = -wrap_y;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
int mz = -diz;
 | 
			
		||||
int pz = +diz;
 | 
			
		||||
int wrap_z = (sz - 1) * diz;
 | 
			
		||||
if ( z == 0 ) {
 | 
			
		||||
  mz = wrap_z;
 | 
			
		||||
} else if ( z == sz - 1 ) {
 | 
			
		||||
  pz = -wrap_z;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
__global ftype *epsx = eps + XX;
 | 
			
		||||
__global ftype *epsy = eps + YY;
 | 
			
		||||
__global ftype *epsz = eps + ZZ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *   Precalclate derivatives
 | 
			
		||||
 */
 | 
			
		||||
ftype dHxy = Hx[i] - Hx[i + my];
 | 
			
		||||
ftype dHxz = Hx[i] - Hx[i + mz];
 | 
			
		||||
 | 
			
		||||
ftype dHyx = Hy[i] - Hy[i + mx];
 | 
			
		||||
ftype dHyz = Hy[i] - Hy[i + mz];
 | 
			
		||||
 | 
			
		||||
ftype dHzx = Hz[i] - Hz[i + mx];
 | 
			
		||||
ftype dHzy = Hz[i] - Hz[i + my];
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *   PML Update
 | 
			
		||||
 */
 | 
			
		||||
// PML effects on E
 | 
			
		||||
ftype pExi = 0;
 | 
			
		||||
ftype pEyi = 0;
 | 
			
		||||
ftype pEzi = 0;
 | 
			
		||||
 | 
			
		||||
int pml_xn_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( x < pml_xn_thickness ) {
 | 
			
		||||
    const size_t ir = x;                          // index into pml parameters
 | 
			
		||||
    const size_t ip = z + y * sz + ir * sz * sy;  // linear index into Psi
 | 
			
		||||
    Psi_xn_Ey[ip] = px0en[ir] * Psi_xn_Ey[ip] + px1en[ir] * dHzx;
 | 
			
		||||
    Psi_xn_Ez[ip] = px0en[ir] * Psi_xn_Ez[ip] + px1en[ir] * dHyx;
 | 
			
		||||
    pEyi -= Psi_xn_Ey[ip];
 | 
			
		||||
    pEzi += Psi_xn_Ez[ip];
 | 
			
		||||
}int pml_xp_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( sx > x && x >= sx - pml_xp_thickness ) {
 | 
			
		||||
    const size_t ir = (sx - 1) - x;                     // index into pml parameters
 | 
			
		||||
    const size_t ip = z + y * sz + ir * sz * sy;  // linear index into Psi
 | 
			
		||||
    Psi_xp_Ey[ip] = px0ep[ir] * Psi_xp_Ey[ip] + px1ep[ir] * dHzx;
 | 
			
		||||
    Psi_xp_Ez[ip] = px0ep[ir] * Psi_xp_Ez[ip] + px1ep[ir] * dHyx;
 | 
			
		||||
    pEyi -= Psi_xp_Ey[ip];
 | 
			
		||||
    pEzi += Psi_xp_Ez[ip];
 | 
			
		||||
}int pml_yn_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( y < pml_yn_thickness ) {
 | 
			
		||||
    const size_t ir = y;                          // index into pml parameters
 | 
			
		||||
    const size_t ip = z + x * sz + ir * sz * sx;  // linear index into Psi
 | 
			
		||||
    Psi_yn_Ex[ip] = py0en[ir] * Psi_yn_Ex[ip] + py1en[ir] * dHzy;
 | 
			
		||||
    Psi_yn_Ez[ip] = py0en[ir] * Psi_yn_Ez[ip] + py1en[ir] * dHxy;
 | 
			
		||||
    pExi += Psi_yn_Ex[ip];
 | 
			
		||||
    pEzi -= Psi_yn_Ez[ip];
 | 
			
		||||
}int pml_yp_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( sy > y && y >= sy - pml_yp_thickness ) {
 | 
			
		||||
    const size_t ir = (sy - 1) - y;                     // index into pml parameters
 | 
			
		||||
    const size_t ip = z + x * sz + ir * sz * sx;  // linear index into Psi
 | 
			
		||||
    Psi_yp_Ex[ip] = py0ep[ir] * Psi_yp_Ex[ip] + py1ep[ir] * dHzy;
 | 
			
		||||
    Psi_yp_Ez[ip] = py0ep[ir] * Psi_yp_Ez[ip] + py1ep[ir] * dHxy;
 | 
			
		||||
    pExi += Psi_yp_Ex[ip];
 | 
			
		||||
    pEzi -= Psi_yp_Ez[ip];
 | 
			
		||||
}int pml_zn_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( z < pml_zn_thickness ) {
 | 
			
		||||
    const size_t ir = z;                          // index into pml parameters
 | 
			
		||||
    const size_t ip = y + x * sy + ir * sy * sx;  // linear index into Psi
 | 
			
		||||
    Psi_zn_Ex[ip] = pz0en[ir] * Psi_zn_Ex[ip] + pz1en[ir] * dHyz;
 | 
			
		||||
    Psi_zn_Ey[ip] = pz0en[ir] * Psi_zn_Ey[ip] + pz1en[ir] * dHxz;
 | 
			
		||||
    pExi -= Psi_zn_Ex[ip];
 | 
			
		||||
    pEyi += Psi_zn_Ey[ip];
 | 
			
		||||
}int pml_zp_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( sz > z && z >= sz - pml_zp_thickness ) {
 | 
			
		||||
    const size_t ir = (sz - 1) - z;                     // index into pml parameters
 | 
			
		||||
    const size_t ip = y + x * sy + ir * sy * sx;  // linear index into Psi
 | 
			
		||||
    Psi_zp_Ex[ip] = pz0ep[ir] * Psi_zp_Ex[ip] + pz1ep[ir] * dHyz;
 | 
			
		||||
    Psi_zp_Ey[ip] = pz0ep[ir] * Psi_zp_Ey[ip] + pz1ep[ir] * dHxz;
 | 
			
		||||
    pExi -= Psi_zp_Ex[ip];
 | 
			
		||||
    pEyi += Psi_zp_Ey[ip];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *  Update E
 | 
			
		||||
 */
 | 
			
		||||
Ex[i] += dt / epsx[i] * (dHzy - dHyz + pExi);
 | 
			
		||||
Ey[i] += dt / epsy[i] * (dHxz - dHzx + pEyi);
 | 
			
		||||
Ez[i] += dt / epsz[i] * (dHyx - dHxy + pEzi);
 | 
			
		||||
==========================================
 | 
			
		||||
/*
 | 
			
		||||
 *  Update H-field, including any PMLs.
 | 
			
		||||
 *   Also precalculate values for poynting vector if necessary.
 | 
			
		||||
 *
 | 
			
		||||
 *  Template parameters:
 | 
			
		||||
 *   common_header: Rendered contents of common.cl
 | 
			
		||||
 *   pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing
 | 
			
		||||
        axes, polarities, and thicknesses.
 | 
			
		||||
 *   do_poynting: Whether to precalculate poynting vector components (boolean)
 | 
			
		||||
 *
 | 
			
		||||
 *  OpenCL args:
 | 
			
		||||
 *   E, H, dt, [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS]
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef float ftype;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Field size info
 | 
			
		||||
 */
 | 
			
		||||
const size_t sx = 395;
 | 
			
		||||
const size_t sy = 345;
 | 
			
		||||
const size_t sz = 73;
 | 
			
		||||
const size_t field_size = sx * sy * sz;
 | 
			
		||||
 | 
			
		||||
//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
 | 
			
		||||
// i is outside the bounds of Ex[].
 | 
			
		||||
if (i >= field_size) {
 | 
			
		||||
    PYOPENCL_ELWISE_CONTINUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Array indexing
 | 
			
		||||
 */
 | 
			
		||||
// Given a linear index i and shape (sx, sy, sz), defines x, y, and z
 | 
			
		||||
//  as the 3D indices of the current element (i).
 | 
			
		||||
// (ie, converts linear index [i] to field indices (x, y, z)
 | 
			
		||||
const size_t x = i / (sz * sy);
 | 
			
		||||
const size_t y = (i - x * sz * sy) / sz;
 | 
			
		||||
const size_t z = (i - y * sz - x * sz * sy);
 | 
			
		||||
 | 
			
		||||
// Calculate linear index offsets corresponding to offsets in 3D
 | 
			
		||||
// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z)
 | 
			
		||||
const size_t dix = sz * sy;
 | 
			
		||||
const size_t diy = sz;
 | 
			
		||||
const size_t diz = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Pointer math
 | 
			
		||||
 */
 | 
			
		||||
//Pointer offsets into the components of a linearized vector-field
 | 
			
		||||
// (eg. Hx = H + XX, where H and Hx are pointers)
 | 
			
		||||
const size_t XX = 0;
 | 
			
		||||
const size_t YY = field_size;
 | 
			
		||||
const size_t ZZ = field_size * 2;
 | 
			
		||||
 | 
			
		||||
//Define pointers to vector components of each field (eg. Hx = H + XX)
 | 
			
		||||
__global ftype *Ex = E + XX;
 | 
			
		||||
__global ftype *Ey = E + YY;
 | 
			
		||||
__global ftype *Ez = E + ZZ;
 | 
			
		||||
 | 
			
		||||
__global ftype *Hx = H + XX;
 | 
			
		||||
__global ftype *Hy = H + YY;
 | 
			
		||||
__global ftype *Hz = H + ZZ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Implement periodic boundary conditions
 | 
			
		||||
 *
 | 
			
		||||
 * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction.
 | 
			
		||||
 * In the event that we start at x == 0, we actually want to wrap around and grab the cell
 | 
			
		||||
 * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix .
 | 
			
		||||
 *
 | 
			
		||||
 * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction.
 | 
			
		||||
 * In the event that we start at x == (sx - 1), we actually want to wrap around and grab
 | 
			
		||||
 * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
int mx = -dix;
 | 
			
		||||
int px = +dix;
 | 
			
		||||
int wrap_x = (sx - 1) * dix;
 | 
			
		||||
if ( x == 0 ) {
 | 
			
		||||
  mx = wrap_x;
 | 
			
		||||
} else if ( x == sx - 1 ) {
 | 
			
		||||
  px = -wrap_x;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
int my = -diy;
 | 
			
		||||
int py = +diy;
 | 
			
		||||
int wrap_y = (sy - 1) * diy;
 | 
			
		||||
if ( y == 0 ) {
 | 
			
		||||
  my = wrap_y;
 | 
			
		||||
} else if ( y == sy - 1 ) {
 | 
			
		||||
  py = -wrap_y;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
int mz = -diz;
 | 
			
		||||
int pz = +diz;
 | 
			
		||||
int wrap_z = (sz - 1) * diz;
 | 
			
		||||
if ( z == 0 ) {
 | 
			
		||||
  mz = wrap_z;
 | 
			
		||||
} else if ( z == sz - 1 ) {
 | 
			
		||||
  pz = -wrap_z;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *  Precalculate derivatives
 | 
			
		||||
 */
 | 
			
		||||
ftype dExy = Ex[i + py] - Ex[i];
 | 
			
		||||
ftype dExz = Ex[i + pz] - Ex[i];
 | 
			
		||||
 | 
			
		||||
ftype dEyx = Ey[i + px] - Ey[i];
 | 
			
		||||
ftype dEyz = Ey[i + pz] - Ey[i];
 | 
			
		||||
 | 
			
		||||
ftype dEzx = Ez[i + px] - Ez[i];
 | 
			
		||||
ftype dEzy = Ez[i + py] - Ez[i];
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *  Precalculate averaged E
 | 
			
		||||
 */
 | 
			
		||||
ftype aExy = Ex[i + py] + Ex[i];
 | 
			
		||||
ftype aExz = Ex[i + pz] + Ex[i];
 | 
			
		||||
 | 
			
		||||
ftype aEyx = Ey[i + px] + Ey[i];
 | 
			
		||||
ftype aEyz = Ey[i + pz] + Ey[i];
 | 
			
		||||
 | 
			
		||||
ftype aEzx = Ez[i + px] + Ez[i];
 | 
			
		||||
ftype aEzy = Ez[i + py] + Ez[i];
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *  PML Update
 | 
			
		||||
 */
 | 
			
		||||
// PML contributions to H
 | 
			
		||||
ftype pHxi = 0;
 | 
			
		||||
ftype pHyi = 0;
 | 
			
		||||
ftype pHzi = 0;
 | 
			
		||||
 | 
			
		||||
int pml_xn_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( x < pml_xn_thickness ) {
 | 
			
		||||
    const size_t ir = x;                          // index into pml parameters
 | 
			
		||||
    const size_t ip = z + y * sz + ir * sz * sy;  // linear index into Psi
 | 
			
		||||
    Psi_xn_Hy[ip] = px0hn[ir] * Psi_xn_Hy[ip] + px1hn[ir] * dEzx;
 | 
			
		||||
    Psi_xn_Hz[ip] = px0hn[ir] * Psi_xn_Hz[ip] + px1hn[ir] * dEyx;
 | 
			
		||||
    pHyi += Psi_xn_Hy[ip];
 | 
			
		||||
    pHzi -= Psi_xn_Hz[ip];
 | 
			
		||||
}int pml_xp_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( sx > x && x >= sx - pml_xp_thickness ) {
 | 
			
		||||
    const size_t ir = (sx - 1) - x;                     // index into pml parameters
 | 
			
		||||
    const size_t ip = z + y * sz + ir * sz * sy;  // linear index into Psi
 | 
			
		||||
    Psi_xp_Hy[ip] = px0hp[ir] * Psi_xp_Hy[ip] + px1hp[ir] * dEzx;
 | 
			
		||||
    Psi_xp_Hz[ip] = px0hp[ir] * Psi_xp_Hz[ip] + px1hp[ir] * dEyx;
 | 
			
		||||
    pHyi += Psi_xp_Hy[ip];
 | 
			
		||||
    pHzi -= Psi_xp_Hz[ip];
 | 
			
		||||
}int pml_yn_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( y < pml_yn_thickness ) {
 | 
			
		||||
    const size_t ir = y;                          // index into pml parameters
 | 
			
		||||
    const size_t ip = z + x * sz + ir * sz * sx;  // linear index into Psi
 | 
			
		||||
    Psi_yn_Hx[ip] = py0hn[ir] * Psi_yn_Hx[ip] + py1hn[ir] * dEzy;
 | 
			
		||||
    Psi_yn_Hz[ip] = py0hn[ir] * Psi_yn_Hz[ip] + py1hn[ir] * dExy;
 | 
			
		||||
    pHxi -= Psi_yn_Hx[ip];
 | 
			
		||||
    pHzi += Psi_yn_Hz[ip];
 | 
			
		||||
}int pml_yp_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( sy > y && y >= sy - pml_yp_thickness ) {
 | 
			
		||||
    const size_t ir = (sy - 1) - y;                     // index into pml parameters
 | 
			
		||||
    const size_t ip = z + x * sz + ir * sz * sx;  // linear index into Psi
 | 
			
		||||
    Psi_yp_Hx[ip] = py0hp[ir] * Psi_yp_Hx[ip] + py1hp[ir] * dEzy;
 | 
			
		||||
    Psi_yp_Hz[ip] = py0hp[ir] * Psi_yp_Hz[ip] + py1hp[ir] * dExy;
 | 
			
		||||
    pHxi -= Psi_yp_Hx[ip];
 | 
			
		||||
    pHzi += Psi_yp_Hz[ip];
 | 
			
		||||
}int pml_zn_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( z < pml_zn_thickness ) {
 | 
			
		||||
    const size_t ir = z;                          // index into pml parameters
 | 
			
		||||
    const size_t ip = y + x * sy + ir * sy * sx;  // linear index into Psi
 | 
			
		||||
    Psi_zn_Hx[ip] = pz0hn[ir] * Psi_zn_Hx[ip] + pz1hn[ir] * dEyz;
 | 
			
		||||
    Psi_zn_Hy[ip] = pz0hn[ir] * Psi_zn_Hy[ip] + pz1hn[ir] * dExz;
 | 
			
		||||
    pHxi += Psi_zn_Hx[ip];
 | 
			
		||||
    pHyi -= Psi_zn_Hy[ip];
 | 
			
		||||
}int pml_zp_thickness = 8;
 | 
			
		||||
 | 
			
		||||
if ( sz > z && z >= sz - pml_zp_thickness ) {
 | 
			
		||||
    const size_t ir = (sz - 1) - z;                     // index into pml parameters
 | 
			
		||||
    const size_t ip = y + x * sy + ir * sy * sx;  // linear index into Psi
 | 
			
		||||
    Psi_zp_Hx[ip] = pz0hp[ir] * Psi_zp_Hx[ip] + pz1hp[ir] * dEyz;
 | 
			
		||||
    Psi_zp_Hy[ip] = pz0hp[ir] * Psi_zp_Hy[ip] + pz1hp[ir] * dExz;
 | 
			
		||||
    pHxi += Psi_zp_Hx[ip];
 | 
			
		||||
    pHyi -= Psi_zp_Hy[ip];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *  Update H
 | 
			
		||||
 */
 | 
			
		||||
// Save old H for averaging
 | 
			
		||||
ftype Hx_old = Hx[i];
 | 
			
		||||
ftype Hy_old = Hy[i];
 | 
			
		||||
ftype Hz_old = Hz[i];
 | 
			
		||||
 | 
			
		||||
// H update equations
 | 
			
		||||
Hx[i] -= dt * (dEzy - dEyz + pHxi);
 | 
			
		||||
Hy[i] -= dt * (dExz - dEzx + pHyi);
 | 
			
		||||
Hz[i] -= dt * (dEyx - dExy + pHzi);
 | 
			
		||||
 | 
			
		||||
// Average H across timesteps
 | 
			
		||||
ftype aHxt = Hx[i] + Hx_old;
 | 
			
		||||
ftype aHyt = Hy[i] + Hy_old;
 | 
			
		||||
ftype aHzt = Hz[i] + Hz_old;
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 *  Calculate unscaled S components at H locations
 | 
			
		||||
 */
 | 
			
		||||
__global ftype *oSxy = oS + 0 * field_size;
 | 
			
		||||
__global ftype *oSyz = oS + 1 * field_size;
 | 
			
		||||
__global ftype *oSzx = oS + 2 * field_size;
 | 
			
		||||
__global ftype *oSxz = oS + 3 * field_size;
 | 
			
		||||
__global ftype *oSyx = oS + 4 * field_size;
 | 
			
		||||
__global ftype *oSzy = oS + 5 * field_size;
 | 
			
		||||
 | 
			
		||||
oSxy[i] = aEyx * aHzt;
 | 
			
		||||
oSxz[i] = -aEzx * aHyt;
 | 
			
		||||
oSyz[i] = aEzy * aHxt;
 | 
			
		||||
oSyx[i] = -aExy * aHzt;
 | 
			
		||||
oSzx[i] = aExz * aHyt;
 | 
			
		||||
oSzy[i] = -aEyz * aHxt;
 | 
			
		||||
==========================================
 | 
			
		||||
/*
 | 
			
		||||
 *  Update E-field, including any PMLs.
 | 
			
		||||
 *
 | 
			
		||||
 *  Template parameters:
 | 
			
		||||
 *   common_header: Rendered contents of common.cl
 | 
			
		||||
 *   pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities
 | 
			
		||||
 *   pml_thickness: Number of cells (integer)
 | 
			
		||||
 *
 | 
			
		||||
 *  OpenCL args:
 | 
			
		||||
 *   E, H, dt, S, oS
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef float ftype;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Field size info
 | 
			
		||||
 */
 | 
			
		||||
const size_t sx = 395;
 | 
			
		||||
const size_t sy = 345;
 | 
			
		||||
const size_t sz = 73;
 | 
			
		||||
const size_t field_size = sx * sy * sz;
 | 
			
		||||
 | 
			
		||||
//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
 | 
			
		||||
// i is outside the bounds of Ex[].
 | 
			
		||||
if (i >= field_size) {
 | 
			
		||||
    PYOPENCL_ELWISE_CONTINUE;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Array indexing
 | 
			
		||||
 */
 | 
			
		||||
// Given a linear index i and shape (sx, sy, sz), defines x, y, and z
 | 
			
		||||
//  as the 3D indices of the current element (i).
 | 
			
		||||
// (ie, converts linear index [i] to field indices (x, y, z)
 | 
			
		||||
const size_t x = i / (sz * sy);
 | 
			
		||||
const size_t y = (i - x * sz * sy) / sz;
 | 
			
		||||
const size_t z = (i - y * sz - x * sz * sy);
 | 
			
		||||
 | 
			
		||||
// Calculate linear index offsets corresponding to offsets in 3D
 | 
			
		||||
// (ie, if E[i] <-> E(x, y, z), then E[i + diy] <-> E(x, y + 1, z)
 | 
			
		||||
const size_t dix = sz * sy;
 | 
			
		||||
const size_t diy = sz;
 | 
			
		||||
const size_t diz = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Pointer math
 | 
			
		||||
 */
 | 
			
		||||
//Pointer offsets into the components of a linearized vector-field
 | 
			
		||||
// (eg. Hx = H + XX, where H and Hx are pointers)
 | 
			
		||||
const size_t XX = 0;
 | 
			
		||||
const size_t YY = field_size;
 | 
			
		||||
const size_t ZZ = field_size * 2;
 | 
			
		||||
 | 
			
		||||
//Define pointers to vector components of each field (eg. Hx = H + XX)
 | 
			
		||||
__global ftype *Ex = E + XX;
 | 
			
		||||
__global ftype *Ey = E + YY;
 | 
			
		||||
__global ftype *Ez = E + ZZ;
 | 
			
		||||
 | 
			
		||||
__global ftype *Hx = H + XX;
 | 
			
		||||
__global ftype *Hy = H + YY;
 | 
			
		||||
__global ftype *Hz = H + ZZ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Implement periodic boundary conditions
 | 
			
		||||
 *
 | 
			
		||||
 * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction.
 | 
			
		||||
 * In the event that we start at x == 0, we actually want to wrap around and grab the cell
 | 
			
		||||
 * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix .
 | 
			
		||||
 *
 | 
			
		||||
 * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction.
 | 
			
		||||
 * In the event that we start at x == (sx - 1), we actually want to wrap around and grab
 | 
			
		||||
 * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
int mx = -dix;
 | 
			
		||||
int px = +dix;
 | 
			
		||||
int wrap_x = (sx - 1) * dix;
 | 
			
		||||
if ( x == 0 ) {
 | 
			
		||||
  mx = wrap_x;
 | 
			
		||||
} else if ( x == sx - 1 ) {
 | 
			
		||||
  px = -wrap_x;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
int my = -diy;
 | 
			
		||||
int py = +diy;
 | 
			
		||||
int wrap_y = (sy - 1) * diy;
 | 
			
		||||
if ( y == 0 ) {
 | 
			
		||||
  my = wrap_y;
 | 
			
		||||
} else if ( y == sy - 1 ) {
 | 
			
		||||
  py = -wrap_y;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
int mz = -diz;
 | 
			
		||||
int pz = +diz;
 | 
			
		||||
int wrap_z = (sz - 1) * diz;
 | 
			
		||||
if ( z == 0 ) {
 | 
			
		||||
  mz = wrap_z;
 | 
			
		||||
} else if ( z == sz - 1 ) {
 | 
			
		||||
  pz = -wrap_z;
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Calculate S from oS (pre-calculated components)
 | 
			
		||||
 */
 | 
			
		||||
__global ftype *Sx = S + XX;
 | 
			
		||||
__global ftype *Sy = S + YY;
 | 
			
		||||
__global ftype *Sz = S + ZZ;
 | 
			
		||||
 | 
			
		||||
// Use unscaled S components from H locations
 | 
			
		||||
__global ftype *oSxy = oS + 0 * field_size;
 | 
			
		||||
__global ftype *oSyz = oS + 1 * field_size;
 | 
			
		||||
__global ftype *oSzx = oS + 2 * field_size;
 | 
			
		||||
__global ftype *oSxz = oS + 3 * field_size;
 | 
			
		||||
__global ftype *oSyx = oS + 4 * field_size;
 | 
			
		||||
__global ftype *oSzy = oS + 5 * field_size;
 | 
			
		||||
 | 
			
		||||
ftype s_factor = dt * 0.125;
 | 
			
		||||
Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor;
 | 
			
		||||
Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor;
 | 
			
		||||
Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor;
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user