accelerInt  v0.1
exprb43_init.cu
Go to the documentation of this file.
1 
10 #include "rational_approximant.cuh"
11 #include "solver_options.cuh"
12 #include "solver_props.cuh"
13 #include "gpu_macros.cuh"
14 
15 #ifdef GENERATE_DOCS
16 namespace exprb43cu {
17 #endif
18 
23  const char* solver_name() {
24  const char* name = "exprb43-int-gpu";
25  return name;
26  }
27 
28 #ifdef LOG_OUTPUT
29  //make logging array definitions
30  __device__ double err_log[MAX_STEPS];
31  __device__ int m_log[MAX_STEPS];
32  __device__ int m1_log[MAX_STEPS];
33  __device__ int m2_log[MAX_STEPS];
34  __device__ double t_log[MAX_STEPS];
35  __device__ double h_log[MAX_STEPS];
36  __device__ bool reject_log[MAX_STEPS];
37  __device__ int num_integrator_steps;
38  double err_log_host[MAX_STEPS];
39  int m_log_host[MAX_STEPS];
40  int m1_log_host[MAX_STEPS];
41  int m2_log_host[MAX_STEPS];
42  double t_log_host[MAX_STEPS];
43  double h_log_host[MAX_STEPS];
44  bool reject_log_host[MAX_STEPS];
45  int num_integrator_steps_host;
46  FILE* logFile = 0;
47  FILE* rFile = 0;
48 #endif
49 
57  void solver_log() {
58  #ifdef LOG_OUTPUT
59  //first copy back num steps to make sure we're inbounds
60  cudaErrorCheck( cudaMemcpyFromSymbol(&num_integrator_steps_host, num_integrator_steps, sizeof(int)) );
61  if (num_integrator_steps_host == -1)
62  exit(-1);
63  //otherwise copy back
64  cudaErrorCheck( cudaMemcpyFromSymbol(err_log_host, err_log, num_integrator_steps_host * sizeof(double)) );
65  cudaErrorCheck( cudaMemcpyFromSymbol(m_log_host, m_log, num_integrator_steps_host * sizeof(int)) );
66  cudaErrorCheck( cudaMemcpyFromSymbol(m1_log_host, m1_log, num_integrator_steps_host * sizeof(int)) );
67  cudaErrorCheck( cudaMemcpyFromSymbol(m2_log_host, m2_log, num_integrator_steps_host * sizeof(int)) );
68  cudaErrorCheck( cudaMemcpyFromSymbol(t_log_host, t_log, num_integrator_steps_host * sizeof(double)) );
69  cudaErrorCheck( cudaMemcpyFromSymbol(h_log_host, h_log, num_integrator_steps_host * sizeof(double)) );
70  cudaErrorCheck( cudaMemcpyFromSymbol(reject_log_host, reject_log, num_integrator_steps_host * sizeof(bool)) );
71  //and print
72  for (int i = 0; i < num_integrator_steps_host; ++i)
73  {
74  if (reject_log_host[i])
75  {
76  fprintf(rFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t_log_host[i], h_log_host[i], err_log_host[i], m_log_host[i], m1_log_host[i], m2_log_host[i]);
77  }
78  else
79  {
80  fprintf(logFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t_log_host[i], h_log_host[i], err_log_host[i], m_log_host[i], m1_log_host[i], m2_log_host[i]);
81  }
82  }
83  #endif
84  }
85 
93  void init_solver_log() {
94  #ifdef LOG_OUTPUT
95  //file for krylov logging
96  //open and clear
97  const char* f_name = solver_name();
98  int len = strlen(f_name);
99  char out_name[len + 17];
100  sprintf(out_name, "log/%s-kry-log.txt", f_name);
101  logFile = fopen(out_name, "w");
102 
103  char out_reject_name[len + 23];
104  sprintf(out_reject_name, "log/%s-kry-reject.txt", f_name);
105  //file for krylov logging
106  //open and clear
107  rFile = fopen(out_reject_name, "w");
108  #endif
109  }
110 
119  //return the size (in bytes), needed per cuda thread
120  size_t num_bytes = 0;
121  //scale array
122  num_bytes += NSP * sizeof(double);
123  //three work arrays
124  num_bytes += 3 * STRIDE * sizeof(double);
125  //gy
126  num_bytes += NSP * sizeof(double);
127  //Hm, phiHm
128  num_bytes += 2 * STRIDE * STRIDE * sizeof(double);
129  //Vm
130  num_bytes += NSP * STRIDE * sizeof(double);
131  //saved actions
132  num_bytes += 5 * NSP * sizeof(double);
133  //one pivot array
134  num_bytes += STRIDE * sizeof(int);
135  //complex inverse
136  num_bytes += STRIDE * STRIDE * sizeof(cuDoubleComplex);
137  //complex work array
138  num_bytes += STRIDE * sizeof(cuDoubleComplex);
139  //result flag
140  num_bytes += 1 * sizeof(int);
141 
142  return num_bytes;
143  }
144 
151 void createAndZero(void** ptr, size_t size)
152 {
153  cudaErrorCheck(cudaMalloc(ptr, size));
154  cudaErrorCheck(cudaMemset(*ptr, 0, size));
155 }
156 
167  // Allocate storage for the device struct
168  cudaErrorCheck( cudaMalloc(d_mem, sizeof(solver_memory)) );
169  //allocate the device arrays on the host pointer
170  createAndZero((void**)&((*h_mem)->sc), NSP * padded * sizeof(double));
171  createAndZero((void**)&((*h_mem)->work1), STRIDE * padded * sizeof(double));
172  createAndZero((void**)&((*h_mem)->work2), STRIDE * padded * sizeof(double));
173  createAndZero((void**)&((*h_mem)->work3), STRIDE * padded * sizeof(double));
174  createAndZero((void**)&((*h_mem)->gy), NSP * padded * sizeof(double));
175  createAndZero((void**)&((*h_mem)->Hm), STRIDE * STRIDE * padded * sizeof(double));
176  createAndZero((void**)&((*h_mem)->phiHm), STRIDE * STRIDE * padded * sizeof(double));
177  createAndZero((void**)&((*h_mem)->Vm), NSP * STRIDE * padded * sizeof(double));
178  createAndZero((void**)&((*h_mem)->savedActions), 5 * NSP * padded * sizeof(double));
179  createAndZero((void**)&((*h_mem)->ipiv), NSP * padded * sizeof(int));
180  createAndZero((void**)&((*h_mem)->invA), STRIDE * STRIDE * padded * sizeof(cuDoubleComplex));
181  createAndZero((void**)&((*h_mem)->work4), STRIDE * padded * sizeof(cuDoubleComplex));
182  createAndZero((void**)&((*h_mem)->result), padded * sizeof(int));
183 
184  //copy host struct to device
185  cudaErrorCheck( cudaMemcpy(*d_mem, *h_mem, sizeof(solver_memory), cudaMemcpyHostToDevice) );
186 }
187 
196  void cleanup_solver(solver_memory** h_mem, solver_memory** d_mem) {
197  #ifdef LOG_OUTPUT
198  //close files
199  fclose(rFile);
200  fclose(logFile);
201  #endif
202  cudaErrorCheck( cudaFree((*h_mem)->sc) );
203  cudaErrorCheck( cudaFree((*h_mem)->work1) );
204  cudaErrorCheck( cudaFree((*h_mem)->work2) );
205  cudaErrorCheck( cudaFree((*h_mem)->work3) );
206  cudaErrorCheck( cudaFree((*h_mem)->gy) );
207  cudaErrorCheck( cudaFree((*h_mem)->Hm) );
208  cudaErrorCheck( cudaFree((*h_mem)->phiHm) );
209  cudaErrorCheck( cudaFree((*h_mem)->Vm) );
210  cudaErrorCheck( cudaFree((*h_mem)->savedActions) );
211  cudaErrorCheck( cudaFree((*h_mem)->ipiv) );
212  cudaErrorCheck( cudaFree((*h_mem)->invA) );
213  cudaErrorCheck( cudaFree((*h_mem)->work4) );
214  cudaErrorCheck( cudaFree((*h_mem)->result) );
215  cudaErrorCheck( cudaFree(*d_mem) );
216  }
217 
218 #ifdef GENERATE_DOCS
219 }
220 #endif
Defines some simple macros to simplify GPU indexing.
void createAndZero(void **ptr, size_t size)
Convienvience method to Cuda Malloc and memset a pointer to zero.
int padded
Padded # of ODEs to solve.
void initialize_solver(int padded, solver_memory **h_mem, solver_memory **d_mem)
Initializes the GPU solver.
#define NSP
The IVP system size.
Definition: header.cuh:20
void init_solver_log()
Initializes solver specific items for logging.
Definition: exprb43_init.cu:93
#define STRIDE
the matrix dimensions
#define MAX_STEPS
Maximum allowed internal timesteps per integration step.
Definition: exp4_props.cuh:30
The generic initialization file for poles/hosts for RA based evaulation of the matrix exponential...
simple convenience file to include the correct solver properties file
void find_poles_and_residuals()
get poles and residues for rational approximant to matrix exponential
void solver_log()
Executes solver specific logging tasks.
Definition: exprb43_init.cu:57
#define cudaErrorCheck(ans)
Definition: gpu_macros.cuh:26
const char * solver_name()
Returns a descriptive solver name.
Definition: exprb43_init.cu:23
void cleanup_solver(solver_memory **h_mem, solver_memory **d_mem)
Cleans up solver memory.
A file generated by Scons that specifies various options to the solvers.
size_t required_solver_size()
Returns the total size (in bytes) required for memory storage for a single GPU thread Used in calcula...