From b372130c6b91030a7cc56842a07de569b792bec4 Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 16 Apr 2017 02:51:09 -0700 Subject: [PATCH] try out opencl image3d_t... it runs in the base case, no idea if correctly --- fdtd.py | 4 +-- fdtd/kernels/common.cl | 36 +++----------------------- fdtd/simulation.py | 59 ++++++++++++++++++++++++++---------------- 3 files changed, 42 insertions(+), 57 deletions(-) diff --git a/fdtd.py b/fdtd.py index bbdabec..d41a038 100644 --- a/fdtd.py +++ b/fdtd.py @@ -122,7 +122,7 @@ def main(): print('grid shape: {}'.format(grid.shape)) # #### Create the simulation grid #### - sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8) + sim = Simulation(grid.grids, do_poynting=False, pmls=[]) # Source parameters and function w = 2 * numpy.pi * dx / wl @@ -150,7 +150,7 @@ def main(): sim.update_E([]).wait() ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape) - sim.E[ind] += field_source(t) +# sim.buf[ind] += field_source(t) e = sim.update_H([]) if sim.update_S: e = sim.update_S([e]) diff --git a/fdtd/kernels/common.cl b/fdtd/kernels/common.cl index d203ca7..e43336b 100644 --- a/fdtd/kernels/common.cl +++ b/fdtd/kernels/common.cl @@ -2,14 +2,10 @@ /* Common code for E, H updates * * Template parameters: - * ftype type name (e.g. float or double) * shape list of 3 ints specifying shape of fields */ #} -typedef {{ftype}} ftype; - - /* * Field size info */ @@ -18,13 +14,6 @@ const size_t sy = {{shape[1]}}; const size_t sz = {{shape[2]}}; const size_t field_size = sx * sy * sz; -//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if -// i is outside the bounds of Ex[]. -if (i >= field_size) { - PYOPENCL_ELWISE_CONTINUE; -} - - /* * Array indexing */ @@ -42,25 +31,6 @@ const size_t diy = sz; const size_t diz = 1; -/* - * Pointer math - */ -//Pointer offsets into the components of a linearized vector-field -// (eg. Hx = H + XX, where H and Hx are pointers) -const size_t XX = 0; -const size_t YY = field_size; -const size_t ZZ = field_size * 2; - -//Define pointers to vector components of each field (eg. Hx = H + XX) -__global ftype *Ex = E + XX; -__global ftype *Ey = E + YY; -__global ftype *Ez = E + ZZ; - -__global ftype *Hx = H + XX; -__global ftype *Hy = H + YY; -__global ftype *Hz = H + ZZ; - - /* * Implement periodic boundary conditions * @@ -73,9 +43,9 @@ __global ftype *Hz = H + ZZ; * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix . */ {% for r in 'xyz' %} -int m{{r}} = -di{{r}}; -int p{{r}} = +di{{r}}; -int wrap_{{r}} = (s{{r}} - 1) * di{{r}}; +int m{{r}} = -1; +int p{{r}} = +1; +int wrap_{{r}} = s{{r}} - 1; if ( {{r}} == 0 ) { m{{r}} = wrap_{{r}}; } else if ( {{r}} == s{{r}} - 1 ) { diff --git a/fdtd/simulation.py b/fdtd/simulation.py index f1a8dec..9ba9b88 100644 --- a/fdtd/simulation.py +++ b/fdtd/simulation.py @@ -17,6 +17,7 @@ from fdfd_tools import vec __author__ = 'Jan Petykiewicz' +float4 = pyopencl.array.vec.float4 # Create jinja2 env on module load jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels')) @@ -88,12 +89,23 @@ class Simulation(object): else: self.dt = dt + def make4d(f): + g = [numpy.transpose(fi)[:,:,:,None] for fi in f] + h = g + [numpy.empty_like(g[0])] + j = numpy.concatenate(h, axis=3) + return numpy.ascontiguousarray(j, dtype=numpy.float32) + self.arg_type = float_type self.sources = {} - self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type)) + ef = make4d(epsilon).astype(float_type) + self.eps = pyopencl.image_from_array(self.context, + make4d(epsilon), num_channels=4, mode='r') + + self.buf = pyopencl.array.Array(self.queue, shape=epsilon.shape, dtype=float4) if initial_E is None: - self.E = pyopencl.array.zeros_like(self.eps) + self.E = pyopencl.image_from_array(self.context, + make4d(epsilon) * 0, num_channels=4, mode='r') else: if len(initial_E) != 3: Exception('Initial_E must be a list of length 3') @@ -102,7 +114,8 @@ class Simulation(object): self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type)) if initial_H is None: - self.H = pyopencl.array.zeros_like(self.eps) + self.H = pyopencl.image_from_array(self.context, + make4d(epsilon) * 0, num_channels=4, mode='r') else: if len(initial_H) != 3: Exception('Initial_H must be a list of length 3') @@ -119,15 +132,8 @@ class Simulation(object): return ctype + ' *' + arg base_fields = OrderedDict() - base_fields[ptr('E')] = self.E - base_fields[ptr('H')] = self.H - base_fields[ctype + ' dt'] = self.dt - - eps_field = OrderedDict() - eps_field[ptr('eps')] = self.eps common_source = jinja_env.get_template('common.cl').render( - ftype=ctype, shape=epsilon[0].shape, ) jinja_args = { @@ -136,8 +142,8 @@ class Simulation(object): 'pmls': pmls, 'do_poynting': do_poynting, } - E_source = jinja_env.get_template('update_e.cl').render(**jinja_args) - H_source = jinja_env.get_template('update_h.cl').render(**jinja_args) + E_source = jinja_env.get_template('update_e_full.cl').render(**jinja_args) + H_source = jinja_env.get_template('update_h_full.cl').render(**jinja_args) self.sources['E'] = E_source self.sources['H'] = H_source @@ -198,17 +204,26 @@ class Simulation(object): ''' Create operations ''' - E_args = OrderedDict() - [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)] - E_update = ElementwiseKernel(self.context, operation=E_source, - arguments=', '.join(E_args.keys())) + E_update = pyopencl.Program(self.context, E_source).build().update_e + H_update = pyopencl.Program(self.context, H_source).build().update_h + max_gs = E_update.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, + self.queue.device) + gs, ls = self.buf.get_sizes(self.queue, max_gs) + print('gs', gs, ls, max_gs) + def update_E(e): + e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.E, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e) + e = E_update(self.queue, gs, ls, self.buf.data, self.H, numpy.float32(dt), self.eps, numpy.uint32(self.buf.size), wait_for=[e]) + return e - H_args = OrderedDict() - [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)] - H_update = ElementwiseKernel(self.context, operation=H_source, - arguments=', '.join(H_args.keys())) - self.update_E = lambda e: E_update(*E_args.values(), wait_for=e) - self.update_H = lambda e: H_update(*H_args.values(), wait_for=e) + def update_H(e): + e = pyopencl.enqueue_copy(self.queue, self.E, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e) + e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.H, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e]) + e = H_update(self.queue, gs, ls, self.E, self.buf.data, numpy.float32(dt), numpy.uint32(self.buf.size), wait_for=[e]) + e = pyopencl.enqueue_copy(self.queue, self.H, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e]) + return e + + self.update_E = update_E + self.update_H = update_H if do_poynting: S_args = OrderedDict()