accelerInt/exp4__init_8cu_source.html

 #include "rational_approximant.cuh"
 #include "solver_options.cuh"
 #include "solver_props.cuh"
 #include "gpu_macros.cuh"

 #ifdef GENERATE_DOCS
 namespace exp4cu {
 #endif

 void createAndZero(void** ptr, size_t size)
 {
   cudaErrorCheck(cudaMalloc(ptr, size));
   cudaErrorCheck(cudaMemset(*ptr, 0, size));
 }

 void initialize_solver(int padded, solver_memory** h_mem, solver_memory** d_mem) {
     find_poles_and_residuals();
     // Allocate storage for the device struct
     cudaErrorCheck( cudaMalloc(d_mem, sizeof(solver_memory)) );
     //allocate the device arrays on the host pointer
     createAndZero((void**)&((*h_mem)->sc), NSP * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->work1), STRIDE * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->work2), STRIDE * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->work3), STRIDE * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->work4), STRIDE * padded * sizeof(cuDoubleComplex));
     createAndZero((void**)&((*h_mem)->Hm), STRIDE * STRIDE * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->phiHm), STRIDE * STRIDE * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->Vm), NSP * STRIDE * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->ipiv), NSP * padded * sizeof(int));
     createAndZero((void**)&((*h_mem)->invA), STRIDE * STRIDE * padded * sizeof(cuDoubleComplex));
     createAndZero((void**)&((*h_mem)->result), padded * sizeof(int));
     createAndZero((void**)&((*h_mem)->k1), NSP * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->k2), NSP * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->k3), NSP * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->k4), NSP * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->k5), NSP * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->k6), NSP * padded * sizeof(double));
     createAndZero((void**)&((*h_mem)->k7), NSP * padded * sizeof(double));

     //copy host struct to device
     cudaErrorCheck( cudaMemcpy(*d_mem, *h_mem, sizeof(solver_memory), cudaMemcpyHostToDevice) );
  }

  const char* solver_name() {
     const char* name = "exp4-int-gpu";
     return name;
  }

 #ifdef LOG_OUTPUT
     //make logging array definitions
     __device__ double err_log[MAX_STEPS];
     __device__ int m_log[MAX_STEPS];
     __device__ int m1_log[MAX_STEPS];
     __device__ int m2_log[MAX_STEPS];
     __device__ double t_log[MAX_STEPS];
     __device__ double h_log[MAX_STEPS];
     __device__ bool reject_log[MAX_STEPS];
     __device__ int num_integrator_steps;
     double err_log_host[MAX_STEPS];
     int m_log_host[MAX_STEPS];
     int m1_log_host[MAX_STEPS];
     int m2_log_host[MAX_STEPS];
     double t_log_host[MAX_STEPS];
     double h_log_host[MAX_STEPS];
     bool reject_log_host[MAX_STEPS];
     int num_integrator_steps_host;
     FILE* logFile = 0;
     FILE* rFile = 0;
 #endif


  void solver_log() {
  #ifdef LOG_OUTPUT
     //first copy back num steps to make sure we're inbounds
     cudaErrorCheck( cudaMemcpyFromSymbol(&num_integrator_steps_host, num_integrator_steps, sizeof(int)) );
     if (num_integrator_steps_host == -1)
         exit(-1);
     //otherwise copy back
     cudaErrorCheck( cudaMemcpyFromSymbol(err_log_host, err_log, num_integrator_steps_host * sizeof(double)) );
     cudaErrorCheck( cudaMemcpyFromSymbol(m_log_host, m_log, num_integrator_steps_host * sizeof(int)) );
     cudaErrorCheck( cudaMemcpyFromSymbol(m1_log_host, m1_log, num_integrator_steps_host * sizeof(int)) );
     cudaErrorCheck( cudaMemcpyFromSymbol(m2_log_host, m2_log, num_integrator_steps_host * sizeof(int)) );
     cudaErrorCheck( cudaMemcpyFromSymbol(t_log_host, t_log, num_integrator_steps_host * sizeof(double)) );
     cudaErrorCheck( cudaMemcpyFromSymbol(h_log_host, h_log, num_integrator_steps_host * sizeof(double)) );
     cudaErrorCheck( cudaMemcpyFromSymbol(reject_log_host, reject_log, num_integrator_steps_host * sizeof(bool)) );
     //and print
     for (int i = 0; i < num_integrator_steps_host; ++i)
     {
         if (reject_log_host[i])
         {
             fprintf(rFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t_log_host[i], h_log_host[i], err_log_host[i], m_log_host[i], m1_log_host[i], m2_log_host[i]);
         }
         else
         {
             fprintf(logFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t_log_host[i], h_log_host[i], err_log_host[i], m_log_host[i], m1_log_host[i], m2_log_host[i]);
         }
     }
  #endif
  }

  void init_solver_log() {
  #ifdef LOG_OUTPUT
     //file for krylov logging
     //open and clear
     const char* f_name = solver_name();
     int len = strlen(f_name);
     char out_name[len + 17];
     sprintf(out_name, "log/%s-kry-log.txt", f_name);
     logFile = fopen(out_name, "w");

     char out_reject_name[len + 23];
     sprintf(out_reject_name, "log/%s-kry-reject.txt", f_name);
     //file for krylov logging
     //open and clear
     rFile = fopen(out_reject_name, "w");
  #endif
  }

   size_t required_solver_size() {
     //return the size (in bytes), needed per cuda thread
     size_t num_bytes = 0;
     //three work arrays
     num_bytes += 3 * STRIDE;
     //Hm, phiHm
     num_bytes += 2 * STRIDE * STRIDE;
     //Vm
     num_bytes += NSP * STRIDE;
     //7 k arrays
     num_bytes += 7 * NSP;
     //add all doubles
     num_bytes *= sizeof(double);
     //one pivot array
     num_bytes += STRIDE * sizeof(int);
     //complex inverse
     num_bytes += STRIDE * STRIDE * sizeof(cuDoubleComplex);
     //complex work array
     num_bytes += STRIDE * sizeof(cuDoubleComplex);
     //result flag
     num_bytes += 1 * sizeof(int);

     return num_bytes;
  }

  void cleanup_solver(solver_memory** h_mem, solver_memory** d_mem) {
  #ifdef LOG_OUTPUT
     //close files
     fclose(rFile);
     fclose(logFile);
  #endif
     cudaErrorCheck( cudaFree((*h_mem)->sc) );
     cudaErrorCheck( cudaFree((*h_mem)->work1) );
     cudaErrorCheck( cudaFree((*h_mem)->work2) );
     cudaErrorCheck( cudaFree((*h_mem)->work3) );
     cudaErrorCheck( cudaFree((*h_mem)->work4) );
     cudaErrorCheck( cudaFree((*h_mem)->Hm) );
     cudaErrorCheck( cudaFree((*h_mem)->phiHm) );
     cudaErrorCheck( cudaFree((*h_mem)->Vm) );
     cudaErrorCheck( cudaFree((*h_mem)->ipiv) );
     cudaErrorCheck( cudaFree((*h_mem)->invA) );
     cudaErrorCheck( cudaFree((*h_mem)->result) );
     cudaErrorCheck( cudaFree((*h_mem)->k1) );
     cudaErrorCheck( cudaFree((*h_mem)->k2) );
     cudaErrorCheck( cudaFree((*h_mem)->k3) );
     cudaErrorCheck( cudaFree((*h_mem)->k4) );
     cudaErrorCheck( cudaFree((*h_mem)->k5) );
     cudaErrorCheck( cudaFree((*h_mem)->k6) );
     cudaErrorCheck( cudaFree((*h_mem)->k7) );
     cudaErrorCheck( cudaFree(*d_mem) );
  }

 #ifdef GENERATE_DOCS
 }
 #endif
exp4cu::cleanup_solver
void cleanup_solver(solver_memory **h_mem, solver_memory **d_mem)
Cleans up solver memory.
Definition: exp4_init.cu:200

exp4cu::initialize_solver
void initialize_solver(int padded, solver_memory **h_mem, solver_memory **d_mem)
Initializes the GPU solver.
Definition: exp4_init.cu:39

gpu_macros.cuh
Defines some simple macros to simplify GPU indexing.

exp4cu::required_solver_size
size_t required_solver_size()
Returns the total size (in bytes) required for memory storage for a single GPU thread Used in calcula...
Definition: exp4_init.cu:167

genericcu::padded
int padded
Padded # of ODEs to solve.
Definition: solver_interface.cu:18

NSP
#define NSP
The IVP system size.
Definition: header.cuh:20

STRIDE
#define STRIDE
the matrix dimensions
Definition: radau2a_props.cuh:20

MAX_STEPS
#define MAX_STEPS
Maximum allowed internal timesteps per integration step.
Definition: exp4_props.cuh:30

exp4cu::solver_name
const char * solver_name()
Returns a descriptive solver name.
Definition: exp4_init.cu:71

exp4cu::solver_memory
Structure containing memory needed for EXP4 algorithm.
Definition: exp4_props.cuh:37

exp4cu::createAndZero
void createAndZero(void **ptr, size_t size)
Convienvience method to Cuda Malloc and memset a pointer to zero.
Definition: exp4_init.cu:25

rational_approximant.cuh
The generic initialization file for poles/hosts for RA based evaulation of the matrix exponential...

solver_props.cuh
simple convenience file to include the correct solver properties file

exp4cu
Definition: exp4.cu:41

find_poles_and_residuals
void find_poles_and_residuals()
get poles and residues for rational approximant to matrix exponential
Definition: rational_approximant.c:23

exp4cu::solver_log
void solver_log()
Executes solver specific logging tasks.
Definition: exp4_init.cu:106

exp4cu::init_solver_log
void init_solver_log()
Initializes solver specific items for logging.
Definition: exp4_init.cu:142

cudaErrorCheck
#define cudaErrorCheck(ans)
Definition: gpu_macros.cuh:26

solver_options.cuh
A file generated by Scons that specifies various options to the solvers.

plotter.size
size
Definition: plotter.py:39