forked from jan/opencl_fdfd
		
	consolidate boundary conditions in common.cl; add some comments and minor cleanup
This commit is contained in:
		
							parent
							
								
									f364fbc8b6
								
							
						
					
					
						commit
						3ad0dd3c50
					
				@ -4,17 +4,25 @@
 | 
				
			|||||||
 *  shape       list of 3 ints specifying shape of fields
 | 
					 *  shape       list of 3 ints specifying shape of fields
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Field size info
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
// Field sizes
 | 
					// Field sizes
 | 
				
			||||||
const int sx = {{shape[0]}};
 | 
					const int sx = {{shape[0]}};
 | 
				
			||||||
const int sy = {{shape[1]}};
 | 
					const int sy = {{shape[1]}};
 | 
				
			||||||
const int sz = {{shape[2]}};
 | 
					const int sz = {{shape[2]}};
 | 
				
			||||||
 | 
					const size_t field_size = sx * sy * sz;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
 | 
					//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
 | 
				
			||||||
// i is outside the bounds of Ex[].
 | 
					// i is outside the bounds of Ex[].
 | 
				
			||||||
if (i >= sx * sy * sz) {
 | 
					if (i >= field_size) {
 | 
				
			||||||
    PYOPENCL_ELWISE_CONTINUE;
 | 
					    PYOPENCL_ELWISE_CONTINUE;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Array indexing
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
// Given a linear index i and shape (sx, sy, sz), defines x, y, and z
 | 
					// Given a linear index i and shape (sx, sy, sz), defines x, y, and z
 | 
				
			||||||
//  as the 3D indices of the current element (i).
 | 
					//  as the 3D indices of the current element (i).
 | 
				
			||||||
// (ie, converts linear index [i] to field indices (x, y, z)
 | 
					// (ie, converts linear index [i] to field indices (x, y, z)
 | 
				
			||||||
@ -28,11 +36,15 @@ const int dix = sz * sy;
 | 
				
			|||||||
const int diy = sz;
 | 
					const int diy = sz;
 | 
				
			||||||
const int diz = 1;
 | 
					const int diz = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Pointer math
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
//Pointer offsets into the components of a linearized vector-field
 | 
					//Pointer offsets into the components of a linearized vector-field
 | 
				
			||||||
// (eg. Hx = H + XX, where H and Hx are pointers)
 | 
					// (eg. Hx = H + XX, where H and Hx are pointers)
 | 
				
			||||||
const int XX = 0;
 | 
					const int XX = 0;
 | 
				
			||||||
const int YY = sx * sy * sz;
 | 
					const int YY = field_size;
 | 
				
			||||||
const int ZZ = sx * sy * sz * 2;
 | 
					const int ZZ = field_size * 2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//Define pointers to vector components of each field (eg. Hx = H + XX)
 | 
					//Define pointers to vector components of each field (eg. Hx = H + XX)
 | 
				
			||||||
__global ctype *Ex = E + XX;
 | 
					__global ctype *Ex = E + XX;
 | 
				
			||||||
@ -42,3 +54,26 @@ __global ctype *Ez = E + ZZ;
 | 
				
			|||||||
__global ctype *Hx = H + XX;
 | 
					__global ctype *Hx = H + XX;
 | 
				
			||||||
__global ctype *Hy = H + YY;
 | 
					__global ctype *Hy = H + YY;
 | 
				
			||||||
__global ctype *Hz = H + ZZ;
 | 
					__global ctype *Hz = H + ZZ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Implement periodic boundary conditions
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * mx ([m]inus [x]) gives the index offset of the adjacent cell in the minus-x direction.
 | 
				
			||||||
 | 
					 * In the event that we start at x == 0, we actually want to wrap around and grab the cell
 | 
				
			||||||
 | 
					 * x_{-1} == (sx - 1) instead, ie. mx = (sx - 1) * dix .
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * px ([p]lus [x]) gives the index offset of the adjacent cell in the plus-x direction.
 | 
				
			||||||
 | 
					 * In the event that we start at x == (sx - 1), we actually want to wrap around and grab
 | 
				
			||||||
 | 
					 * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					{% for r in 'xyz' %}
 | 
				
			||||||
 | 
					int m{{r}} = -di{{r}};
 | 
				
			||||||
 | 
					int p{{r}} = +di{{r}};
 | 
				
			||||||
 | 
					int wrap_{{r}} = (s{{r}} - 1) * di{{r}};
 | 
				
			||||||
 | 
					if ( {{r}} == 0 ) {
 | 
				
			||||||
 | 
					  m{{r}} = wrap_{{r}};
 | 
				
			||||||
 | 
					} else if ( {{r}} == s{{r}} - 1 ) {
 | 
				
			||||||
 | 
					  p{{r}} = -wrap_{{r}};
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					{% endfor %}
 | 
				
			||||||
 | 
				
			|||||||
@ -19,6 +19,8 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
{{common_cl}}
 | 
					{{common_cl}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__global ctype *inv_mu_x = inv_mu + XX;
 | 
					__global ctype *inv_mu_x = inv_mu + XX;
 | 
				
			||||||
__global ctype *inv_mu_y = inv_mu + YY;
 | 
					__global ctype *inv_mu_y = inv_mu + YY;
 | 
				
			||||||
__global ctype *inv_mu_z = inv_mu + ZZ;
 | 
					__global ctype *inv_mu_z = inv_mu + ZZ;
 | 
				
			||||||
@ -27,32 +29,6 @@ __global char *pmc_x = pmc + XX;
 | 
				
			|||||||
__global char *pmc_y = pmc + YY;
 | 
					__global char *pmc_y = pmc + YY;
 | 
				
			||||||
__global char *pmc_z = pmc + ZZ;
 | 
					__global char *pmc_z = pmc + ZZ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Implement periodic boundary conditions
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * ipx gives the index of the adjacent cell in the plus-x direction ([i]ndex [p]lus [x]).
 | 
					 | 
				
			||||||
 * In the event that we start at x == (sx - 1), we actually want to wrap around and grab the cell
 | 
					 | 
				
			||||||
 * where x == 0 instead, ie. ipx = i - (sx - 1) * dix .
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
int ipx, ipy, ipz;
 | 
					 | 
				
			||||||
if ( x == sx - 1 ) {
 | 
					 | 
				
			||||||
  ipx = i - (sx - 1) * dix;
 | 
					 | 
				
			||||||
} else {
 | 
					 | 
				
			||||||
  ipx = i + dix;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if ( y == sy - 1 ) {
 | 
					 | 
				
			||||||
  ipy = i - (sy - 1) * diy;
 | 
					 | 
				
			||||||
} else {
 | 
					 | 
				
			||||||
  ipy = i + diy;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if ( z == sz - 1 ) {
 | 
					 | 
				
			||||||
  ipz = i - (sz - 1) * diz;
 | 
					 | 
				
			||||||
} else {
 | 
					 | 
				
			||||||
  ipz = i + diz;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
//Update H components; set them to 0 if PMC is enabled at that location.
 | 
					//Update H components; set them to 0 if PMC is enabled at that location.
 | 
				
			||||||
//Mu division and PMC conditional are only included if {{mu}} and {{pmc}} are true
 | 
					//Mu division and PMC conditional are only included if {{mu}} and {{pmc}} are true
 | 
				
			||||||
@ -62,8 +38,8 @@ if (pmc_x[i] != 0) {
 | 
				
			|||||||
} else
 | 
					} else
 | 
				
			||||||
{%- endif -%}
 | 
					{%- endif -%}
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    ctype Dzy = mul(sub(Ez[ipy], Ez[i]), inv_dey[y]);
 | 
					    ctype Dzy = mul(sub(Ez[i + py], Ez[i]), inv_dey[y]);
 | 
				
			||||||
    ctype Dyz = mul(sub(Ey[ipz], Ey[i]), inv_dez[z]);
 | 
					    ctype Dyz = mul(sub(Ey[i + pz], Ey[i]), inv_dez[z]);
 | 
				
			||||||
    ctype x_curl = sub(Dzy, Dyz);
 | 
					    ctype x_curl = sub(Dzy, Dyz);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    {%- if mu -%}
 | 
					    {%- if mu -%}
 | 
				
			||||||
@ -79,8 +55,8 @@ if (pmc_y[i] != 0) {
 | 
				
			|||||||
} else
 | 
					} else
 | 
				
			||||||
{%- endif -%}
 | 
					{%- endif -%}
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    ctype Dxz = mul(sub(Ex[ipz], Ex[i]), inv_dez[z]);
 | 
					    ctype Dxz = mul(sub(Ex[i + pz], Ex[i]), inv_dez[z]);
 | 
				
			||||||
    ctype Dzx = mul(sub(Ez[ipx], Ez[i]), inv_dex[x]);
 | 
					    ctype Dzx = mul(sub(Ez[i + px], Ez[i]), inv_dex[x]);
 | 
				
			||||||
    ctype y_curl = sub(Dxz, Dzx);
 | 
					    ctype y_curl = sub(Dxz, Dzx);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    {%- if mu -%}
 | 
					    {%- if mu -%}
 | 
				
			||||||
@ -96,8 +72,8 @@ if (pmc_z[i] != 0) {
 | 
				
			|||||||
} else
 | 
					} else
 | 
				
			||||||
{%- endif -%}
 | 
					{%- endif -%}
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    ctype Dyx = mul(sub(Ey[ipx], Ey[i]), inv_dex[x]);
 | 
					    ctype Dyx = mul(sub(Ey[i + px], Ey[i]), inv_dex[x]);
 | 
				
			||||||
    ctype Dxy = mul(sub(Ex[ipy], Ex[i]), inv_dey[y]);
 | 
					    ctype Dxy = mul(sub(Ex[i + py], Ex[i]), inv_dey[y]);
 | 
				
			||||||
    ctype z_curl = sub(Dyx, Dxy);
 | 
					    ctype z_curl = sub(Dyx, Dxy);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    {%- if mu -%}
 | 
					    {%- if mu -%}
 | 
				
			||||||
 | 
				
			|||||||
@ -19,6 +19,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
{{common_cl}}
 | 
					{{common_cl}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__global ctype *oeps_x = oeps + XX;
 | 
					__global ctype *oeps_x = oeps + XX;
 | 
				
			||||||
__global ctype *oeps_y = oeps + YY;
 | 
					__global ctype *oeps_y = oeps + YY;
 | 
				
			||||||
@ -33,41 +34,14 @@ __global ctype *Pl_y = Pl + YY;
 | 
				
			|||||||
__global ctype *Pl_z = Pl + ZZ;
 | 
					__global ctype *Pl_z = Pl + ZZ;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					 | 
				
			||||||
 * Implement periodic boundary conditions
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * imx gives the index of the adjacent cell in the minus-x direction ([i]ndex [m]inus [x]).
 | 
					 | 
				
			||||||
 * In the event that we start at x == 0, we actually want to wrap around and grab the cell
 | 
					 | 
				
			||||||
 * where x == (sx - 1) instead, ie. imx = i + (sx - 1) * dix .
 | 
					 | 
				
			||||||
 */
 | 
					 | 
				
			||||||
int imx, imy, imz;
 | 
					 | 
				
			||||||
if ( x == 0 ) {
 | 
					 | 
				
			||||||
  imx = i + (sx - 1) * dix;
 | 
					 | 
				
			||||||
} else {
 | 
					 | 
				
			||||||
  imx = i - dix;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if ( y == 0 ) {
 | 
					 | 
				
			||||||
  imy = i + (sy - 1) * diy;
 | 
					 | 
				
			||||||
} else {
 | 
					 | 
				
			||||||
  imy = i - diy;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if ( z == 0 ) {
 | 
					 | 
				
			||||||
  imz = i + (sz - 1) * diz;
 | 
					 | 
				
			||||||
} else {
 | 
					 | 
				
			||||||
  imz = i - diz;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//Update E components; set them to 0 if PEC is enabled there.
 | 
					//Update E components; set them to 0 if PEC is enabled there.
 | 
				
			||||||
{% if pec -%}
 | 
					{% if pec -%}
 | 
				
			||||||
if (pec_x[i] == 0)
 | 
					if (pec_x[i] == 0)
 | 
				
			||||||
{%- endif -%}
 | 
					{%- endif -%}
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    ctype tEx = mul(Ex[i], oeps_x[i]);
 | 
					    ctype tEx = mul(Ex[i], oeps_x[i]);
 | 
				
			||||||
    ctype Dzy = mul(sub(Hz[i], Hz[imy]), inv_dhy[y]);
 | 
					    ctype Dzy = mul(sub(Hz[i], Hz[i + my]), inv_dhy[y]);
 | 
				
			||||||
    ctype Dyz = mul(sub(Hy[i], Hy[imz]), inv_dhz[z]);
 | 
					    ctype Dyz = mul(sub(Hy[i], Hy[i + mz]), inv_dhz[z]);
 | 
				
			||||||
    tEx = add(tEx, sub(Dzy, Dyz));
 | 
					    tEx = add(tEx, sub(Dzy, Dyz));
 | 
				
			||||||
    Ex[i] = mul(tEx, Pl_x[i]);
 | 
					    Ex[i] = mul(tEx, Pl_x[i]);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -77,8 +51,8 @@ if (pec_y[i] == 0)
 | 
				
			|||||||
{%- endif -%}
 | 
					{%- endif -%}
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    ctype tEy = mul(Ey[i], oeps_y[i]);
 | 
					    ctype tEy = mul(Ey[i], oeps_y[i]);
 | 
				
			||||||
    ctype Dxz = mul(sub(Hx[i], Hx[imz]), inv_dhz[z]);
 | 
					    ctype Dxz = mul(sub(Hx[i], Hx[i + mz]), inv_dhz[z]);
 | 
				
			||||||
    ctype Dzx = mul(sub(Hz[i], Hz[imx]), inv_dhx[x]);
 | 
					    ctype Dzx = mul(sub(Hz[i], Hz[i + mx]), inv_dhx[x]);
 | 
				
			||||||
    tEy = add(tEy, sub(Dxz, Dzx));
 | 
					    tEy = add(tEy, sub(Dxz, Dzx));
 | 
				
			||||||
    Ey[i] = mul(tEy, Pl_y[i]);
 | 
					    Ey[i] = mul(tEy, Pl_y[i]);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -88,8 +62,8 @@ if (pec_z[i] == 0)
 | 
				
			|||||||
{%- endif -%}
 | 
					{%- endif -%}
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    ctype tEz = mul(Ez[i], oeps_z[i]);
 | 
					    ctype tEz = mul(Ez[i], oeps_z[i]);
 | 
				
			||||||
    ctype Dyx = mul(sub(Hy[i], Hy[imx]), inv_dhx[x]);
 | 
					    ctype Dyx = mul(sub(Hy[i], Hy[i + mx]), inv_dhx[x]);
 | 
				
			||||||
    ctype Dxy = mul(sub(Hx[i], Hx[imy]), inv_dhy[y]);
 | 
					    ctype Dxy = mul(sub(Hx[i], Hx[i + my]), inv_dhy[y]);
 | 
				
			||||||
    tEz = add(tEz, sub(Dyx, Dxy));
 | 
					    tEz = add(tEz, sub(Dyx, Dxy));
 | 
				
			||||||
    Ez[i] = mul(tEz, Pl_z[i]);
 | 
					    Ez[i] = mul(tEz, Pl_z[i]);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -113,7 +113,7 @@ def cg_solver(omega: complex,
 | 
				
			|||||||
        Allocate GPU memory and load in data
 | 
					        Allocate GPU memory and load in data
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
    if context is None:
 | 
					    if context is None:
 | 
				
			||||||
        context = pyopencl.create_some_context(False)
 | 
					        context = pyopencl.create_some_context(interactive=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    queue = pyopencl.CommandQueue(context)
 | 
					    queue = pyopencl.CommandQueue(context)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user