accelerInt  v0.1
exp4_init.cu
Go to the documentation of this file.
1 
10 #include "rational_approximant.cuh"
11 #include "solver_options.cuh"
12 #include "solver_props.cuh"
13 #include "gpu_macros.cuh"
14 
15 #ifdef GENERATE_DOCS
16 namespace exp4cu {
17 #endif
18 
25 void createAndZero(void** ptr, size_t size)
26 {
27  cudaErrorCheck(cudaMalloc(ptr, size));
28  cudaErrorCheck(cudaMemset(*ptr, 0, size));
29 }
30 
39 void initialize_solver(int padded, solver_memory** h_mem, solver_memory** d_mem) {
41  // Allocate storage for the device struct
42  cudaErrorCheck( cudaMalloc(d_mem, sizeof(solver_memory)) );
43  //allocate the device arrays on the host pointer
44  createAndZero((void**)&((*h_mem)->sc), NSP * padded * sizeof(double));
45  createAndZero((void**)&((*h_mem)->work1), STRIDE * padded * sizeof(double));
46  createAndZero((void**)&((*h_mem)->work2), STRIDE * padded * sizeof(double));
47  createAndZero((void**)&((*h_mem)->work3), STRIDE * padded * sizeof(double));
48  createAndZero((void**)&((*h_mem)->work4), STRIDE * padded * sizeof(cuDoubleComplex));
49  createAndZero((void**)&((*h_mem)->Hm), STRIDE * STRIDE * padded * sizeof(double));
50  createAndZero((void**)&((*h_mem)->phiHm), STRIDE * STRIDE * padded * sizeof(double));
51  createAndZero((void**)&((*h_mem)->Vm), NSP * STRIDE * padded * sizeof(double));
52  createAndZero((void**)&((*h_mem)->ipiv), NSP * padded * sizeof(int));
53  createAndZero((void**)&((*h_mem)->invA), STRIDE * STRIDE * padded * sizeof(cuDoubleComplex));
54  createAndZero((void**)&((*h_mem)->result), padded * sizeof(int));
55  createAndZero((void**)&((*h_mem)->k1), NSP * padded * sizeof(double));
56  createAndZero((void**)&((*h_mem)->k2), NSP * padded * sizeof(double));
57  createAndZero((void**)&((*h_mem)->k3), NSP * padded * sizeof(double));
58  createAndZero((void**)&((*h_mem)->k4), NSP * padded * sizeof(double));
59  createAndZero((void**)&((*h_mem)->k5), NSP * padded * sizeof(double));
60  createAndZero((void**)&((*h_mem)->k6), NSP * padded * sizeof(double));
61  createAndZero((void**)&((*h_mem)->k7), NSP * padded * sizeof(double));
62 
63  //copy host struct to device
64  cudaErrorCheck( cudaMemcpy(*d_mem, *h_mem, sizeof(solver_memory), cudaMemcpyHostToDevice) );
65  }
66 
71  const char* solver_name() {
72  const char* name = "exp4-int-gpu";
73  return name;
74  }
75 
76 #ifdef LOG_OUTPUT
77  //make logging array definitions
78  __device__ double err_log[MAX_STEPS];
79  __device__ int m_log[MAX_STEPS];
80  __device__ int m1_log[MAX_STEPS];
81  __device__ int m2_log[MAX_STEPS];
82  __device__ double t_log[MAX_STEPS];
83  __device__ double h_log[MAX_STEPS];
84  __device__ bool reject_log[MAX_STEPS];
85  __device__ int num_integrator_steps;
86  double err_log_host[MAX_STEPS];
87  int m_log_host[MAX_STEPS];
88  int m1_log_host[MAX_STEPS];
89  int m2_log_host[MAX_STEPS];
90  double t_log_host[MAX_STEPS];
91  double h_log_host[MAX_STEPS];
92  bool reject_log_host[MAX_STEPS];
93  int num_integrator_steps_host;
94  FILE* logFile = 0;
95  FILE* rFile = 0;
96 #endif
97 
98 
106  void solver_log() {
107  #ifdef LOG_OUTPUT
108  //first copy back num steps to make sure we're inbounds
109  cudaErrorCheck( cudaMemcpyFromSymbol(&num_integrator_steps_host, num_integrator_steps, sizeof(int)) );
110  if (num_integrator_steps_host == -1)
111  exit(-1);
112  //otherwise copy back
113  cudaErrorCheck( cudaMemcpyFromSymbol(err_log_host, err_log, num_integrator_steps_host * sizeof(double)) );
114  cudaErrorCheck( cudaMemcpyFromSymbol(m_log_host, m_log, num_integrator_steps_host * sizeof(int)) );
115  cudaErrorCheck( cudaMemcpyFromSymbol(m1_log_host, m1_log, num_integrator_steps_host * sizeof(int)) );
116  cudaErrorCheck( cudaMemcpyFromSymbol(m2_log_host, m2_log, num_integrator_steps_host * sizeof(int)) );
117  cudaErrorCheck( cudaMemcpyFromSymbol(t_log_host, t_log, num_integrator_steps_host * sizeof(double)) );
118  cudaErrorCheck( cudaMemcpyFromSymbol(h_log_host, h_log, num_integrator_steps_host * sizeof(double)) );
119  cudaErrorCheck( cudaMemcpyFromSymbol(reject_log_host, reject_log, num_integrator_steps_host * sizeof(bool)) );
120  //and print
121  for (int i = 0; i < num_integrator_steps_host; ++i)
122  {
123  if (reject_log_host[i])
124  {
125  fprintf(rFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t_log_host[i], h_log_host[i], err_log_host[i], m_log_host[i], m1_log_host[i], m2_log_host[i]);
126  }
127  else
128  {
129  fprintf(logFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t_log_host[i], h_log_host[i], err_log_host[i], m_log_host[i], m1_log_host[i], m2_log_host[i]);
130  }
131  }
132  #endif
133  }
134 
143  #ifdef LOG_OUTPUT
144  //file for krylov logging
145  //open and clear
146  const char* f_name = solver_name();
147  int len = strlen(f_name);
148  char out_name[len + 17];
149  sprintf(out_name, "log/%s-kry-log.txt", f_name);
150  logFile = fopen(out_name, "w");
151 
152  char out_reject_name[len + 23];
153  sprintf(out_reject_name, "log/%s-kry-reject.txt", f_name);
154  //file for krylov logging
155  //open and clear
156  rFile = fopen(out_reject_name, "w");
157  #endif
158  }
159 
168  //return the size (in bytes), needed per cuda thread
169  size_t num_bytes = 0;
170  //three work arrays
171  num_bytes += 3 * STRIDE;
172  //Hm, phiHm
173  num_bytes += 2 * STRIDE * STRIDE;
174  //Vm
175  num_bytes += NSP * STRIDE;
176  //7 k arrays
177  num_bytes += 7 * NSP;
178  //add all doubles
179  num_bytes *= sizeof(double);
180  //one pivot array
181  num_bytes += STRIDE * sizeof(int);
182  //complex inverse
183  num_bytes += STRIDE * STRIDE * sizeof(cuDoubleComplex);
184  //complex work array
185  num_bytes += STRIDE * sizeof(cuDoubleComplex);
186  //result flag
187  num_bytes += 1 * sizeof(int);
188 
189  return num_bytes;
190  }
191 
200  void cleanup_solver(solver_memory** h_mem, solver_memory** d_mem) {
201  #ifdef LOG_OUTPUT
202  //close files
203  fclose(rFile);
204  fclose(logFile);
205  #endif
206  cudaErrorCheck( cudaFree((*h_mem)->sc) );
207  cudaErrorCheck( cudaFree((*h_mem)->work1) );
208  cudaErrorCheck( cudaFree((*h_mem)->work2) );
209  cudaErrorCheck( cudaFree((*h_mem)->work3) );
210  cudaErrorCheck( cudaFree((*h_mem)->work4) );
211  cudaErrorCheck( cudaFree((*h_mem)->Hm) );
212  cudaErrorCheck( cudaFree((*h_mem)->phiHm) );
213  cudaErrorCheck( cudaFree((*h_mem)->Vm) );
214  cudaErrorCheck( cudaFree((*h_mem)->ipiv) );
215  cudaErrorCheck( cudaFree((*h_mem)->invA) );
216  cudaErrorCheck( cudaFree((*h_mem)->result) );
217  cudaErrorCheck( cudaFree((*h_mem)->k1) );
218  cudaErrorCheck( cudaFree((*h_mem)->k2) );
219  cudaErrorCheck( cudaFree((*h_mem)->k3) );
220  cudaErrorCheck( cudaFree((*h_mem)->k4) );
221  cudaErrorCheck( cudaFree((*h_mem)->k5) );
222  cudaErrorCheck( cudaFree((*h_mem)->k6) );
223  cudaErrorCheck( cudaFree((*h_mem)->k7) );
224  cudaErrorCheck( cudaFree(*d_mem) );
225  }
226 
227 #ifdef GENERATE_DOCS
228 }
229 #endif
void cleanup_solver(solver_memory **h_mem, solver_memory **d_mem)
Cleans up solver memory.
Definition: exp4_init.cu:200
void initialize_solver(int padded, solver_memory **h_mem, solver_memory **d_mem)
Initializes the GPU solver.
Definition: exp4_init.cu:39
Defines some simple macros to simplify GPU indexing.
size_t required_solver_size()
Returns the total size (in bytes) required for memory storage for a single GPU thread Used in calcula...
Definition: exp4_init.cu:167
int padded
Padded # of ODEs to solve.
#define NSP
The IVP system size.
Definition: header.cuh:20
#define STRIDE
the matrix dimensions
#define MAX_STEPS
Maximum allowed internal timesteps per integration step.
Definition: exp4_props.cuh:30
const char * solver_name()
Returns a descriptive solver name.
Definition: exp4_init.cu:71
Structure containing memory needed for EXP4 algorithm.
Definition: exp4_props.cuh:37
void createAndZero(void **ptr, size_t size)
Convienvience method to Cuda Malloc and memset a pointer to zero.
Definition: exp4_init.cu:25
The generic initialization file for poles/hosts for RA based evaulation of the matrix exponential...
simple convenience file to include the correct solver properties file
Definition: exp4.cu:41
void find_poles_and_residuals()
get poles and residues for rational approximant to matrix exponential
void solver_log()
Executes solver specific logging tasks.
Definition: exp4_init.cu:106
void init_solver_log()
Initializes solver specific items for logging.
Definition: exp4_init.cu:142
#define cudaErrorCheck(ans)
Definition: gpu_macros.cuh:26
A file generated by Scons that specifies various options to the solvers.