SLACKHA · andrewalferman · May 2, 2018 · May 2, 2018
diff --git a/examples/oregonator/SConscript b/examples/oregonator/SConscript
@@ -0,0 +1,23 @@
+Import('env')
+cObj = []
+c_src = Glob('*.c')
+import os
+
+FD = env['FINITE_DIFFERENCE']
+
+for src in c_src:
+    if FD and 'jacob' in str(src):
+        continue
+    cObj.append(env.Object(src, variant_dir=env['variant']))
+
+
+cudaObj = []
+if env['build_cuda']:
+    cuda_src = Glob('*.cu')
+
+    for src in cuda_src:
+        if FD and 'jacob' in str(src):
+            continue
+        cudaObj.append(env.CUDAObject(src, variant_dir=env['variant']))
+
+Return ('cObj', 'cudaObj')
diff --git a/examples/oregonator/dydt.c b/examples/oregonator/dydt.c
@@ -0,0 +1,43 @@
+/**
+ * \file
+ * \brief An implementation of the Oregonator right hand side (y' = f(y)) function.
+ *
+ * Implements the evaluation of the derivative of the state vector with respect to time, \f$\dot{\vec{y}}\f$
+ *
+ */
+
+#include "header.h"
+
+#ifdef GENERATE_DOCS
+//put this in the Oregonator namespace for documentation
+namespace oregonator {
+#endif
+
+/**
+ * \brief An implementation of the RHS of the Oregonator equation
+ * \param[in]        t         The current system time
+ * \param[in]        mu        Dummy parameter, needed for compatibility only
+ * \param[in]        y         The state vector
+ * \param[out]       dy        The output RHS (dydt) vector
+ *
+ * The `y` and `dy` vectors supplied here are local versions of the global state vectors.
+ * They have been transformed from the global Column major (Fortan) ordering to a local 1-D vector
+ * Hence the vector accesses can be done in a simple manner below, i.e. y[0] -> \f$y_1\f$, y[1] -> \f$y_2\f$, etc.
+ * @see solver_generic.c
+ */
+void dydt (const double t, const double mu, const double * __restrict__ y, double * __restrict__ dy) {
+
+  double s = 77.27;
+  double q = 8.375E-6;
+  double w = 0.161;
+
+  dy[0] = s * (y[0] - y[0] * y[1] + y[1] - (q * y[0]* y[0]));
+  dy[1] = (y[2] - y[1] - y[0] * y[1]) / s;
+  dy[2] = w * (y[0] - y[2]);
+
+} // end dydt
+
+
+#ifdef GENERATE_DOCS
+}
+#endif
diff --git a/examples/oregonator/dydt.cu b/examples/oregonator/dydt.cu
@@ -0,0 +1,47 @@
+/**
+ * \file
+ * \brief A CUDA implementation of the Oregonator right hand side (y' = f(y)) function.
+ *
+ * Implements the CUDA evaluation of the derivative of the state vector with respect to time, \f$\dot{\vec{y}}\f$
+ *
+ */
+
+#include "header.cuh"
+#include "gpu_macros.cuh"
+
+#ifdef GENERATE_DOCS
+//put this in the Oregonator namespace for documentation
+namespace oregonator_cu {
+#endif
+
+/**
+ * \brief An implementation of the RHS of the Oregonator equation
+ * \param[in]        t         The current system time
+ * \param[in]        mu        Dummy parameter, needed for compatibility only
+ * \param[in]        y         The state vector
+ * \param[out]       dy        The output RHS (dydt) vector
+ * \param[in]        d_mem     The mechanism_memory struct.  In future versions, this will be used to access the \f$\mu\f$ parameter to have a consistent interface.
+ *
+ * The `y` and `dy` vectors supplied here are the global state vectors.
+ * Hence the vector accesses must be done with the global thread ID.
+ * The gpu_macros.cuh file defines some useful macros to simplify indexing
+ * @see gpu_macros.cuh
+ */
+ __device__
+void dydt (const double t, const double mu, const double * __restrict__ y, double * __restrict__ dy,
+           const mechanism_memory * __restrict__ d_mem) {
+
+   double s = 77.27;
+   double q = 8.375E-6;
+   double w = 0.161;
+
+   dy[INDEX(0)] = s * (y[INDEX(0)] - y[INDEX(0)] * y[INDEX(1)] + y[INDEX(1)] - (q * y[INDEX(0)]* y[INDEX(0)]));
+   dy[INDEX(1)] = (y[INDEX(2)] - y[INDEX(1)] - y[INDEX(0)] * y[INDEX(1)]) / s;
+   dy[INDEX(2)] = w * (y[INDEX(0)] - y[INDEX(2)]);
+
+} // end dydt
+
+
+#ifdef GENERATE_DOCS
+}
+#endif
diff --git a/examples/oregonator/dydt.cuh b/examples/oregonator/dydt.cuh
@@ -0,0 +1,34 @@
+/**
+ * \file
+ * \brief Contains header definitions for the CUDA RHS function for the Oregonator example
+ *
+ */
+
+#ifndef DYDT_CUH
+#define DYDT_CUH
+
+#ifdef GENERATE_DOCS
+//put this in the Oregonator namespace for documentation
+namespace oregonator_cu {
+#endif
+
+
+
+/**
+ * \brief An implementation of the RHS of the Oregonator equation
+ * \param[in]        t         The current system time
+ * \param[in]        mu        Dummy parameter, needed for compatibility only
+ * \param[in]        y         The state vector
+ * \param[out]       dy        The output RHS (dydt) vector
+ * \param[in]        d_mem     The mechanism_memory struct.  In future versions, this will be used to access the \f$\mu\f$ parameter to have a consistent interface.
+ *
+ */
+__device__
+void dydt (const double t, const double mu, const double * __restrict__ y, double * __restrict__ dy,
+           const mechanism_memory * __restrict__ d_mem);
+
+#ifdef GENERATE_DOCS
+}
+#endif
+
+#endif
diff --git a/examples/oregonator/dydt.h b/examples/oregonator/dydt.h
@@ -0,0 +1,30 @@
+/**
+ * \file
+ * \brief Contains header definitions for the RHS function for the Oregonator example
+ *
+ */
+
+#ifndef DYDT_H
+#define DYDT_H
+
+#ifdef GENERATE_DOCS
+//put this in the Oregonator namespace for documentation
+namespace oregonator {
+#endif
+
+
+
+/**
+ * \brief An implementation of the RHS of the Oregonator equation
+ * \param[in]        t         The current system time
+ * \param[in]        mu        Dummy parameter, needed for compatibility only
+ * \param[in]        y         The state vector
+ * \param[out]       dy        The output RHS (dydt) vector
+ */
+void dydt (const double t, const double mu, const double * __restrict__ y, double * __restrict__ dy);
+
+#ifdef GENERATE_DOCS
+}
+#endif
+
+#endif
diff --git a/examples/oregonator/gpu_macros.cuh b/examples/oregonator/gpu_macros.cuh
@@ -0,0 +1,41 @@
+/**
+ * \file
+ * \brief Defines some simple macros to simplify GPU indexing
+ *
+ */
+
+#ifndef GPU_MACROS_CUH
+#define GPU_MACROS_CUH
+#include <stdio.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <helper_cuda.h>
+
+#ifdef GENERATE_DOCS
+//put this in the Oregonator namespace for documentation
+namespace oregonator_cu {
+#endif
+
+//! The total number of threads in the Grid, provides an offset between vector entries
+#define GRID_DIM (blockDim.x * gridDim.x)
+//! The global CUDA thread index
+#define T_ID (threadIdx.x + blockIdx.x * blockDim.x)
+//! Convenience macro to get the value of a vector at index i, calculated as i * #GRID_DIM + #T_ID
+#define INDEX(i) (T_ID + (i) * GRID_DIM)
+
+#define cudaErrorCheck(ans) { gpuAssert((ans), __FILE__, __LINE__); }
+inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
+{
+    if (code != cudaSuccess)
+    {
+        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+        if (abort) exit(code);
+    }
+}
+
+
+#ifdef GENERATE_DOCS
+}
+#endif
+
+#endif
diff --git a/examples/oregonator/gpu_memory.cu b/examples/oregonator/gpu_memory.cu
@@ -0,0 +1,68 @@
+/*!
+ * \file
+ * \brief Initializes and calculates required GPU memory
+ */
+
+#include "gpu_memory.cuh"
+
+#ifdef GENERATE_DOCS
+//put this in the Oregonator namespace for documentation
+namespace oregonator_cu {
+#endif
+
+/**
+ * \brief Calculates and returns the total memory size (in bytes) required by an individual thread for the
+ *        mechanism_memory struct.
+ */
+size_t required_mechanism_size() {
+  //returns the total required size for the mechanism per thread
+  size_t mech_size = 0;
+  //state vector y
+  mech_size += NSP;
+  //dydt vector
+  mech_size += NSP;
+  //Jacobian
+  mech_size += NSP * NSP;
+  //and mu parameter
+  mech_size += 1;
+  return mech_size * sizeof(double);
+}
+/**
+ * \brief Initializes the host and device mechanism_memory structs.  This is required in order to enable
+ *        passing the struct to CUDA
+ * \param[in]               padded              The padded number of threads to be used by the CUDA solver
+ * \param[in,out]           h_mem               The host version of the mechanism_memory struct to initialize.
+ * \param[in,out]           d_mem               The device version of the mechanism_memory struct to copy the resulting host mechanism_memory struct to.
+ */
+void initialize_gpu_memory(int padded, mechanism_memory** h_mem, mechanism_memory** d_mem)
+{
+  // Allocate storage for the device struct
+  cudaErrorCheck( cudaMalloc(d_mem, sizeof(mechanism_memory)) );
+  //allocate the device arrays on the host pointer
+  cudaErrorCheck( cudaMalloc(&((*h_mem)->y), NSP * padded * sizeof(double)) );
+  cudaErrorCheck( cudaMalloc(&((*h_mem)->dy), NSP * padded * sizeof(double)) );
+  cudaErrorCheck( cudaMalloc(&((*h_mem)->var), 1 * padded * sizeof(double)) );
+  cudaErrorCheck( cudaMalloc(&((*h_mem)->jac), NSP * NSP * padded * sizeof(double)) );
+  // set non-initialized values to zero
+  cudaErrorCheck( cudaMemset((*h_mem)->dy, 0, NSP * padded * sizeof(double)) );
+  cudaErrorCheck( cudaMemset((*h_mem)->jac, 0, NSP * NSP * padded * sizeof(double)) );
+  // and copy to device pointer
+  cudaErrorCheck( cudaMemcpy(*d_mem, *h_mem, sizeof(mechanism_memory), cudaMemcpyHostToDevice) );
+}
+/**
+ * \brief Frees the host and device mechanism_memory structs
+ * \param[in,out]           h_mem               The host version of the mechanism_memory struct.
+ * \param[in,out]           d_mem               The device version of the mechanism_memory struct.
+ */
+void free_gpu_memory(mechanism_memory** h_mem, mechanism_memory** d_mem)
+{
+  cudaErrorCheck(cudaFree((*h_mem)->y));
+  cudaErrorCheck(cudaFree((*h_mem)->dy));
+  cudaErrorCheck(cudaFree((*h_mem)->var));
+  cudaErrorCheck(cudaFree((*h_mem)->jac));
+  cudaErrorCheck(cudaFree(*d_mem));
+}
+
+#ifdef GENERATE_DOCS
+}
+#endif
diff --git a/examples/oregonator/gpu_memory.cuh b/examples/oregonator/gpu_memory.cuh
@@ -0,0 +1,66 @@
+/*!
+ * \file
+ * \brief Headers for GPU memory initialization
+ */
+
+<<<<<<< HEAD
+=======
+#ifndef GPU_MEM_CUH
+#define GPU_MEM_CUH
+
+>>>>>>> master
+#include "header.cuh"
+
+#ifdef GENERATE_DOCS
+//put this in the Oregonator namespace for documentation
+namespace oregonator_cu {
+#endif
+
+/**
+ * \brief This struct is used to store memory for the CUDA RHS and Jacobian evaluation.
+ *        Along with the solver_memory struct, this must be initialized and passed to the solvers
+ *        the run.  @see solver_memory, initialize_gpu_memory, required_mechanism_size
+ *
+ * \param           y               The global state vector arrays
+ * \param           dy              The global dydt vector arrays
+ * \param           var             The global param array [Used for \f$\mu\f$]
+ * \param           jac             The global Jacobian arrays
+ *
+ * This is also heavily used when using with @pyJac to hold additional arrays
+ * for evaluating chemical kinetic reactions.
+ */
+struct mechanism_memory {
+  double * y;
+  double * dy;
+  double * var;
+  double * jac;
+};
+
+/**
+ * \brief Calculates and returns the total memory size (in bytes) required by an individual thread for the
+ *        mechanism_memory struct.
+ */
+size_t required_mechanism_size();
+/**
+ * \brief Initializes the host and device mechanism_memory structs.  This is required in order to enable
+ *        passing the struct to CUDA
+ * \param[in]               padded              The padded number of threads to be used by the CUDA solver
+ * \param[in,out]           h_mem               The host version of the mechanism_memory struct to initialize.
+ * \param[in,out]           d_mem               The device version of the mechanism_memory struct to copy the resulting host mechanism_memory struct to.
+ */
+void initialize_gpu_memory(int padded, mechanism_memory** h_mem, mechanism_memory** d_mem);
+/**
+ * \brief Frees the host and device mechanism_memory structs
+ * \param[in,out]           h_mem               The host version of the mechanism_memory struct.
+ * \param[in,out]           d_mem               The device version of the mechanism_memory struct.
+ */
+void free_gpu_memory(mechanism_memory** h_mem, mechanism_memory** d_mem);
+
+#ifdef GENERATE_DOCS
+}
+#endif
+<<<<<<< HEAD
+=======
+
+#endif
+>>>>>>> master