From b372130c6b91030a7cc56842a07de569b792bec4 Mon Sep 17 00:00:00 2001
From: jan <jan@orange.mpxd.net>
Date: Sun, 16 Apr 2017 02:51:09 -0700
Subject: [PATCH] try out opencl image3d_t... it runs in the base case, no idea
 if correctly

---
 fdtd.py                |  4 +--
 fdtd/kernels/common.cl | 36 +++-----------------------
 fdtd/simulation.py     | 59 ++++++++++++++++++++++++++----------------
 3 files changed, 42 insertions(+), 57 deletions(-)

diff --git a/fdtd.py b/fdtd.py
index bbdabec..d41a038 100644
--- a/fdtd.py
+++ b/fdtd.py
@@ -122,7 +122,7 @@ def main():
 
     print('grid shape: {}'.format(grid.shape))
     # #### Create the simulation grid ####
-    sim = Simulation(grid.grids, do_poynting=True, pml_thickness=8)
+    sim = Simulation(grid.grids, do_poynting=False, pmls=[])
 
     # Source parameters and function
     w = 2 * numpy.pi * dx / wl
@@ -150,7 +150,7 @@ def main():
         sim.update_E([]).wait()
 
         ind = numpy.ravel_multi_index(tuple(grid.shape//2), dims=grid.shape, order='C') + numpy.prod(grid.shape)
-        sim.E[ind] += field_source(t)
+#        sim.buf[ind] += field_source(t)
         e = sim.update_H([])
         if sim.update_S:
             e = sim.update_S([e])
diff --git a/fdtd/kernels/common.cl b/fdtd/kernels/common.cl
index d203ca7..e43336b 100644
--- a/fdtd/kernels/common.cl
+++ b/fdtd/kernels/common.cl
@@ -2,14 +2,10 @@
 /* Common code for E, H updates
  *
  * Template parameters:
- *  ftype	type name (e.g. float or double)
  *  shape       list of 3 ints specifying shape of fields
  */
 #}
 
-typedef {{ftype}} ftype;
-
-
 /*
  * Field size info
  */
@@ -18,13 +14,6 @@ const size_t sy = {{shape[1]}};
 const size_t sz = {{shape[2]}};
 const size_t field_size = sx * sy * sz;
 
-//Since we use i to index into Ex[], Ey[], ... rather than E[], do nothing if
-// i is outside the bounds of Ex[].
-if (i >= field_size) {
-    PYOPENCL_ELWISE_CONTINUE;
-}
-
-
 /*
  * Array indexing
  */
@@ -42,25 +31,6 @@ const size_t diy = sz;
 const size_t diz = 1;
 
 
-/*
- * Pointer math
- */
-//Pointer offsets into the components of a linearized vector-field
-// (eg. Hx = H + XX, where H and Hx are pointers)
-const size_t XX = 0;
-const size_t YY = field_size;
-const size_t ZZ = field_size * 2;
-
-//Define pointers to vector components of each field (eg. Hx = H + XX)
-__global ftype *Ex = E + XX;
-__global ftype *Ey = E + YY;
-__global ftype *Ez = E + ZZ;
-
-__global ftype *Hx = H + XX;
-__global ftype *Hy = H + YY;
-__global ftype *Hz = H + ZZ;
-
-
 /*
  * Implement periodic boundary conditions
  *
@@ -73,9 +43,9 @@ __global ftype *Hz = H + ZZ;
  * the cell x_{+1} == 0 instead, ie. px = -(sx - 1) * dix .
  */
 {% for r in 'xyz' %}
-int m{{r}} = -di{{r}};
-int p{{r}} = +di{{r}};
-int wrap_{{r}} = (s{{r}} - 1) * di{{r}};
+int m{{r}} = -1;
+int p{{r}} = +1;
+int wrap_{{r}} = s{{r}} - 1;
 if ( {{r}} == 0 ) {
   m{{r}} = wrap_{{r}};
 } else if ( {{r}} == s{{r}} - 1 ) {
diff --git a/fdtd/simulation.py b/fdtd/simulation.py
index f1a8dec..9ba9b88 100644
--- a/fdtd/simulation.py
+++ b/fdtd/simulation.py
@@ -17,6 +17,7 @@ from fdfd_tools import vec
 
 __author__ = 'Jan Petykiewicz'
 
+float4 = pyopencl.array.vec.float4
 
 # Create jinja2 env on module load
 jinja_env = jinja2.Environment(loader=jinja2.PackageLoader(__name__, 'kernels'))
@@ -88,12 +89,23 @@ class Simulation(object):
         else:
             self.dt = dt
 
+        def make4d(f):
+            g = [numpy.transpose(fi)[:,:,:,None] for fi in f]
+            h = g + [numpy.empty_like(g[0])]
+            j = numpy.concatenate(h, axis=3)
+            return numpy.ascontiguousarray(j, dtype=numpy.float32) 
+
         self.arg_type = float_type
         self.sources = {}
-        self.eps = pyopencl.array.to_device(self.queue, vec(epsilon).astype(float_type))
+        ef = make4d(epsilon).astype(float_type)
+        self.eps = pyopencl.image_from_array(self.context,
+                make4d(epsilon), num_channels=4, mode='r')
+
+        self.buf = pyopencl.array.Array(self.queue, shape=epsilon.shape, dtype=float4)
 
         if initial_E is None:
-            self.E = pyopencl.array.zeros_like(self.eps)
+            self.E = pyopencl.image_from_array(self.context,
+                make4d(epsilon) * 0, num_channels=4, mode='r')
         else:
             if len(initial_E) != 3:
                 Exception('Initial_E must be a list of length 3')
@@ -102,7 +114,8 @@ class Simulation(object):
             self.E = pyopencl.array.to_device(self.queue, vec(E).astype(float_type))
 
         if initial_H is None:
-            self.H = pyopencl.array.zeros_like(self.eps)
+            self.H = pyopencl.image_from_array(self.context,
+                make4d(epsilon) * 0, num_channels=4, mode='r')
         else:
             if len(initial_H) != 3:
                 Exception('Initial_H must be a list of length 3')
@@ -119,15 +132,8 @@ class Simulation(object):
             return ctype + ' *' + arg
 
         base_fields = OrderedDict()
-        base_fields[ptr('E')] = self.E
-        base_fields[ptr('H')] = self.H
-        base_fields[ctype + ' dt'] = self.dt
-
-        eps_field = OrderedDict()
-        eps_field[ptr('eps')] = self.eps
 
         common_source = jinja_env.get_template('common.cl').render(
-                ftype=ctype,
                 shape=epsilon[0].shape,
                 )
         jinja_args = {
@@ -136,8 +142,8 @@ class Simulation(object):
                 'pmls': pmls,
                 'do_poynting': do_poynting,
                 }
-        E_source = jinja_env.get_template('update_e.cl').render(**jinja_args)
-        H_source = jinja_env.get_template('update_h.cl').render(**jinja_args)
+        E_source = jinja_env.get_template('update_e_full.cl').render(**jinja_args)
+        H_source = jinja_env.get_template('update_h_full.cl').render(**jinja_args)
 
         self.sources['E'] = E_source
         self.sources['H'] = H_source
@@ -198,17 +204,26 @@ class Simulation(object):
         '''
         Create operations
         '''
-        E_args = OrderedDict()
-        [E_args.update(d) for d in (base_fields, eps_field, pml_e_fields)]
-        E_update = ElementwiseKernel(self.context, operation=E_source,
-                                     arguments=', '.join(E_args.keys()))
+        E_update = pyopencl.Program(self.context, E_source).build().update_e
+        H_update = pyopencl.Program(self.context, H_source).build().update_h
+        max_gs = E_update.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE,
+                                      self.queue.device)
+        gs, ls = self.buf.get_sizes(self.queue, max_gs)
+        print('gs', gs, ls, max_gs)
+        def update_E(e):
+            e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.E, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e)
+            e = E_update(self.queue, gs, ls, self.buf.data, self.H, numpy.float32(dt), self.eps, numpy.uint32(self.buf.size), wait_for=[e])
+            return e
 
-        H_args = OrderedDict()
-        [H_args.update(d) for d in (base_fields, pml_h_fields, S_fields)]
-        H_update = ElementwiseKernel(self.context, operation=H_source,
-                                     arguments=', '.join(H_args.keys()))
-        self.update_E = lambda e: E_update(*E_args.values(), wait_for=e)
-        self.update_H = lambda e: H_update(*H_args.values(), wait_for=e)
+        def update_H(e):
+            e = pyopencl.enqueue_copy(self.queue, self.E, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=e)
+            e = pyopencl.enqueue_copy(self.queue, self.buf.data, self.H, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e])
+            e = H_update(self.queue, gs, ls, self.E, self.buf.data, numpy.float32(dt), numpy.uint32(self.buf.size), wait_for=[e])
+            e = pyopencl.enqueue_copy(self.queue, self.H, self.buf.data, offset=0, origin=(0,0,0), region=epsilon[0].shape, wait_for=[e])
+            return e
+
+        self.update_E = update_E
+        self.update_H = update_H
 
         if do_poynting:
             S_args = OrderedDict()