From 1627d0da052f24f85838c3a70a9c1534692bc163 Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Mon, 15 Jul 2019 00:11:09 -0700 Subject: [PATCH 1/3] Consolidate poynting code into update_h --- opencl_fdtd/kernels/update_h.cl | 56 +++++++++++++++++++++------------ opencl_fdtd/simulation.py | 12 ++----- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index 187a18a..a709486 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -6,13 +6,14 @@ * common_header: Rendered contents of common.cl * pmls: [{'axis': 'x', 'polarity': 'n', 'thickness': 8}, ...] list of pml dicts containing * axes, polarities, and thicknesses. - * do_poynting: Whether to precalculate poynting vector components (boolean) + * do_poynting: Whether to calculate poynting vector (boolean) + * do_poynting_halves: Whether to calculate half-step poynting vectors (boolean) * uniform_dx: If grid is uniform, uniform_dx should be the grid spacing. * Otherwise, uniform_dx should be False and [inv_de{xyz}] arrays must be supplied as * OpenCL args. * * OpenCL args: - * E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [oS] + * E, H, dt, [inv_de{xyz}], [p{xyz}{01}h{np}, Psi_{xyz}{np}_H], [S], [S0, S1] */ {{common_header}} @@ -52,7 +53,7 @@ if ({{r}} == s{{r}} - 1) { dE{{u ~ r}} = bloch_re * dE{{u ~ r}} + bloch_im * (F{{u}}[i + p{{u}}] - F{{u}}[i]); dE{{v ~ r}} = bloch_re * dE{{v ~ r}} + bloch_im * (F{{v}}[i + p{{v}}] - F{{v}}[i]); } -{% endfor -%} +{%- endfor %} {%- if do_poynting %} @@ -118,7 +119,7 @@ if ( s{{r}} > {{r}} && {{r}} >= s{{r}} - pml_{{r ~ p}}_thickness ) { /* * Update H */ -{% if do_poynting -%} +{% if do_poynting or do_poynting_halves -%} // Save old H for averaging ftype Hx_old = Hx[i]; ftype Hy_old = Hy[i]; @@ -131,25 +132,40 @@ Hy[i] -= dt * (dExz - dEzx - pHyi); Hz[i] -= dt * (dEyx - dExy - pHzi); {% if do_poynting -%} +/* + * Calculate unscaled S components at ??? locations + * //TODO: document S locations and types + */ +__global ftype *Sx = S + XX; +__global ftype *Sy = S + YY; +__global ftype *Sz = S + ZZ; + // Average H across timesteps ftype aHxt = Hx[i] + Hx_old; ftype aHyt = Hy[i] + Hy_old; ftype aHzt = Hz[i] + Hz_old; -/* - * Calculate unscaled S components at H locations - */ -__global ftype *oSxy = oS + 0 * field_size; -__global ftype *oSyz = oS + 1 * field_size; -__global ftype *oSzx = oS + 2 * field_size; -__global ftype *oSxz = oS + 3 * field_size; -__global ftype *oSyx = oS + 4 * field_size; -__global ftype *oSzy = oS + 5 * field_size; - -oSxy[i] = aEyx * aHzt; -oSxz[i] = -aEzx * aHyt; -oSyz[i] = aEzy * aHxt; -oSyx[i] = -aExy * aHzt; -oSzx[i] = aExz * aHyt; -oSzy[i] = -aEyz * aHxt; +Sx[i] = Ey[i + px] * aHzt - Ez[i + px] * aHyt; +Sy[i] = Ez[i + py] * aHxt - Ex[i + py] * aHzt; +Sz[i] = Ex[i + pz] * aHyt - Ey[i + pz] * aHxt; +{%- endif -%} + +{% if do_poynting_halves -%} +/* + * Calculate unscaled S components at ??? locations + * //TODO: document S locations and types + */ +__global ftype *Sx0 = S0 + XX; +__global ftype *Sy0 = S0 + YY; +__global ftype *Sz0 = S0 + ZZ; +__global ftype *Sx1 = S1 + XX; +__global ftype *Sy1 = S1 + YY; +__global ftype *Sz1 = S1 + ZZ; + +Sx0[i] = Ey[i + px] * Hz_old - Ez[i + px] * Hy_old; +Sy0[i] = Ez[i + py] * Hx_old - Ex[i + py] * Hz_old; +Sz0[i] = Ex[i + pz] * Hy_old - Ey[i + pz] * Hx_old; +Sx1[i] = Ey[i + px] * Hz[i] - Ez[i + px] * Hy[i]; +Sy1[i] = Ez[i + py] * Hx[i] - Ex[i + py] * Hz[i]; +Sz1[i] = Ex[i + pz] * Hy[i] - Ey[i + pz] * Hx[i]; {%- endif -%} diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 03b5588..5faea22 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -26,7 +26,7 @@ class Simulation(object): """ Constructs and holds the basic FDTD operations and related fields - After constructing this object, call the (update_E, update_H, update_S) members + After constructing this object, call the (update_E, update_H) members to perform FDTD updates on the stored (E, H, S) fields: pmls = [{'axis': a, 'polarity': p} for a in 'xyz' for p in 'np'] @@ -43,8 +43,6 @@ class Simulation(object): # Perturb the field (i.e., add a soft current source) sim.E[ind] += numpy.sin(omega * t * sim.dt) event = sim.update_H([]) - if sim.update_S: - event = sim.update_S([event]) event.wait() with lzma.open('saved_simulation', 'wb') as f: @@ -112,6 +110,7 @@ class Simulation(object): :param float_type: numpy.float32 or numpy.float64. Default numpy.float32. :param do_poynting: If true, enables calculation of the poynting vector, S. Poynting vector calculation adds the following computational burdens: + ****INACCURATE, TODO FIXME***** * During update_H, ~6 extra additions/cell are performed in order to spatially average E and temporally average H. These quantities are multiplied (6 multiplications/cell) and then stored (6 writes/cell, cache-friendly). @@ -226,12 +225,7 @@ class Simulation(object): S_fields = OrderedDict() if do_poynting: - S_source = jinja_env.get_template('update_s.cl').render(**jinja_args) - self.sources['S'] = S_source - - self.oS = pyopencl.array.zeros(self.queue, self.E.shape + (2,), dtype=self.arg_type) self.S = pyopencl.array.zeros_like(self.E) - S_fields[ptr('oS')] = self.oS S_fields[ptr('S')] = self.S J_fields = OrderedDict() @@ -257,8 +251,6 @@ class Simulation(object): ''' self.update_E = self._create_operation(E_source, (base_fields, eps_field, pml_e_fields)) self.update_H = self._create_operation(H_source, (base_fields, pml_h_fields, S_fields)) - if do_poynting: - self.update_S = self._create_operation(S_source, (base_fields, S_fields)) if bloch_boundaries: self.update_F = self._create_operation(F_source, (bloch_fields, eps_field, pml_f_fields)) self.update_G = self._create_operation(G_source, (bloch_fields, pml_g_fields)) From 89d99187b6153c21229a1cf00e12b17100ec67be Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Wed, 17 Jul 2019 00:52:31 -0700 Subject: [PATCH 2/3] Remove unused code --- opencl_fdtd/kernels/update_h.cl | 17 ---------------- opencl_fdtd/kernels/update_s.cl | 36 --------------------------------- 2 files changed, 53 deletions(-) delete mode 100644 opencl_fdtd/kernels/update_s.cl diff --git a/opencl_fdtd/kernels/update_h.cl b/opencl_fdtd/kernels/update_h.cl index a709486..a8a6b04 100644 --- a/opencl_fdtd/kernels/update_h.cl +++ b/opencl_fdtd/kernels/update_h.cl @@ -55,23 +55,6 @@ if ({{r}} == s{{r}} - 1) { } {%- endfor %} -{%- if do_poynting %} - - -/* - * Precalculate averaged E - */ -ftype aExy = Ex[i + py] + Ex[i]; -ftype aExz = Ex[i + pz] + Ex[i]; - -ftype aEyx = Ey[i + px] + Ey[i]; -ftype aEyz = Ey[i + pz] + Ey[i]; - -ftype aEzx = Ez[i + px] + Ez[i]; -ftype aEzy = Ez[i + py] + Ez[i]; -{%- endif %} - - /* diff --git a/opencl_fdtd/kernels/update_s.cl b/opencl_fdtd/kernels/update_s.cl deleted file mode 100644 index 94e061e..0000000 --- a/opencl_fdtd/kernels/update_s.cl +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Update E-field, including any PMLs. - * - * Template parameters: - * common_header: Rendered contents of common.cl - * pmls: [('x', 'n'), ('z', 'p'),...] list of pml axes and polarities - * pml_thickness: Number of cells (integer) - * - * OpenCL args: - * E, H, dt, S, oS - */ - -{{common_header}} - -////////////////////////////////////////////////////////////////////// - - -/* - * Calculate S from oS (pre-calculated components) - */ -__global ftype *Sx = S + XX; -__global ftype *Sy = S + YY; -__global ftype *Sz = S + ZZ; - -// Use unscaled S components from H locations -__global ftype *oSxy = oS + 0 * field_size; -__global ftype *oSyz = oS + 1 * field_size; -__global ftype *oSzx = oS + 2 * field_size; -__global ftype *oSxz = oS + 3 * field_size; -__global ftype *oSyx = oS + 4 * field_size; -__global ftype *oSzy = oS + 5 * field_size; - -ftype s_factor = dt * 0.125; -Sx[i] = (oSxy[i] + oSxz[i] + oSxy[i + my] + oSxz[i + mz]) * s_factor; -Sy[i] = (oSyz[i] + oSyx[i] + oSyz[i + mz] + oSyx[i + mx]) * s_factor; -Sz[i] = (oSzx[i] + oSzy[i] + oSzx[i + mx] + oSzy[i + my]) * s_factor; From cf0db63a3ff7b8d1068fc0b7b5e6b709b669278b Mon Sep 17 00:00:00 2001 From: Jan Petykiewicz Date: Wed, 17 Jul 2019 00:53:55 -0700 Subject: [PATCH 3/3] Revert "Add restrict keyword to pointers (not sharing the same memory for multiple fields)" This reverts commit 60b70bb332d29fb5a2aecb801517be659cf4daf9. It appears to have minimal performance impact, and fails to compile on nvidia cards. --- opencl_fdtd/simulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencl_fdtd/simulation.py b/opencl_fdtd/simulation.py index 5faea22..2f919cd 100644 --- a/opencl_fdtd/simulation.py +++ b/opencl_fdtd/simulation.py @@ -169,7 +169,7 @@ class Simulation(object): ctype = type_to_C(self.arg_type) def ptr(arg: str) -> str: - return ctype + ' * restrict ' + arg + return ctype + ' *' + arg base_fields = OrderedDict() base_fields[ptr('E')] = self.E