accelerInt  v0.1
exprb43.c
Go to the documentation of this file.
1 
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <math.h>
21 #include <stdbool.h>
22 #include <string.h>
23 
24 #include "header.h"
25 #include "dydt.h"
26 #include "jacob.h"
27 #include "exprb43_props.h"
28 #include "arnoldi.h"
30 #include "solver_init.h"
31 
32 #ifdef GENERATE_DOCS
33 namespace exprb43 {
34 #endif
35 
37 
47 int integrate (const double t_start, const double t_end, const double pr, double* y) {
48 
49  //initial time
50 #ifdef CONST_TIME_STEP
51  double h = t_end - t_start;
52 #else
53  double h = fmin(1.0e-8, t_end - t_start);
54 #endif
55  double h_new;
56 
57  double err_old = 1.0;
58  double h_old = h;
59 
60  bool reject = false;
61  int failures = 0;
62  int steps = 0;
63 
64  double t = t_start;
65 
66  // get scaling for weighted norm
67  double sc[NSP];
68  scale_init(y, sc);
69 
70  #ifdef LOG_KRYLOV_AND_STEPSIZES
71  //file for krylov logging
72  FILE *logFile;
73  //open and clear
74  const char* f_name = solver_name();
75  int len = strlen(f_name);
76  char out_name[len + 17];
77  sprintf(out_name, "log/%s-kry-log.txt", f_name);
78  logFile = fopen(out_name, "a");
79 
80  char out_reject_name[len + 23];
81  sprintf(out_reject_name, "log/%s-kry-reject.txt", f_name);
82  //file for krylov logging
83  FILE *rFile;
84  //open and clear
85  rFile = fopen(out_reject_name, "a");
86  #endif
87 
88  double beta = 0;
89 
90  // temporary arrays
91  double temp[NSP];
92  double f_temp[NSP];
93  double y1[NSP];
94 
95  // source vector
96  double fy[NSP];
97 
98  // Jacobian matrix
99  double A[NSP * NSP] = {0.0};
100  double gy[NSP];
101 
102  double Hm[STRIDE * STRIDE] = {0.0};
103  double Vm[NSP * STRIDE];
104  double phiHm[STRIDE * STRIDE];
105  double err = 0.0;
106  double savedActions[NSP * 5];
107  int numSteps = 0;
108  while ((t < t_end) && (t + h > t)) {
109 
110  //error checking
111  if (failures >= 5)
112  {
113  return EC_consecutive_steps;
114  }
115  if (steps++ >= MAX_STEPS)
116  {
117  return EC_max_steps_exceeded;
118  }
119  if (t + h <= t)
120  {
121  return EC_h_plus_t_equals_h;
122  }
123 
124  if (!reject) {
125  dydt (t, pr, y, fy);
126  eval_jacob (t, pr, y, A);
127  //gy = fy - A * y
128  sparse_multiplier(A, y, gy);
129 
130  for (int i = 0; i < NSP; ++i) {
131  gy[i] = fy[i] - gy[i];
132  }
133  }
134 
135  //do arnoldi
136  int m = arnoldi(0.5, 1, h, A, fy, sc, &beta, Vm, Hm, phiHm);
137  if (m + 1 >= STRIDE || m < 0)
138  {
139  //need to reduce h and try again
140  h /= 5.0;
141  failures++;
142  reject = true;
143  continue;
144  }
145 
146  // Un2 to be stored in temp
147  //Un2 is partially in the mth column of phiHm
148  //Un2 = y + ** 0.5 * h * phi_1(0.5 * h * A)*fy **
149  //Un2 = y + ** beta * Vm * phiHm(:, m) **
150 
151  //store h * beta * Vm * phi_1(h * Hm) * e1 in savedActions
152  matvec_m_by_m_plusequal(m, phiHm, &phiHm[m * STRIDE], temp);
153  matvec_n_by_m_scale(m, beta, Vm, temp, savedActions);
154 
155  //store 0.5 * h * beta * Vm * phi_1(0.5 * h * Hm) * fy + y in temp
156  matvec_n_by_m_scale_add(m, beta, Vm, &phiHm[m * STRIDE], temp, y);
157  //temp is now equal to Un2
158 
159  //next compute Dn2
160  //Dn2 = (F(Un2) - Jn * Un2) - gy
161 
162  dydt(t, pr, temp, &savedActions[NSP]);
163  sparse_multiplier(A, temp, f_temp);
164 
165 
166  for (int i = 0; i < NSP; ++i) {
167  temp[i] = savedActions[NSP + i] - f_temp[i] - gy[i];
168  }
169  //temp is now equal to Dn2
170 
171  //partially compute Un3 as:
172  //Un3 = y + ** h * phi_1(hA) * fy ** + h * phi_1(hA) * Dn2
173  //Un3 = y + ** h * beta * Vm * phiHm(:, m) **
174 
175  //now we need the action of the exponential on Dn2
176  int m1 = arnoldi(1.0, 4, h, A, temp, sc, &beta, Vm, Hm, phiHm);
177  if (m1 + 4 >= STRIDE || m1 < 0)
178  {
179  //need to reduce h and try again
180  h /= 5.0;
181  failures++;
182  reject = true;
183  continue;
184  }
185 
186  //save Phi3(h * A) * Dn2 to savedActions[0]
187  //save Phi4(h * A) * Dn2 to savedActions[NSP]
188  //add the action of phi_1 on Dn2 to y and hn * phi_1(hA) * fy to get Un3
189  const double* in[5] = {&phiHm[(m1 + 2) * STRIDE], &phiHm[(m1 + 3) * STRIDE], &phiHm[m1 * STRIDE], savedActions, y};
190  double* out[3] = {&savedActions[NSP], &savedActions[2 * NSP], temp};
191  double scale_vec[3] = {beta / (h * h), beta / (h * h * h), beta};
192  matvec_n_by_m_scale_special(m1, scale_vec, Vm, in, out);
193  //Un3 is now in temp
194 
195  //next compute Dn3
196  //Dn3 = F(Un3) - A * Un3 - gy
197  dydt(t, pr, temp, &savedActions[3 * NSP]);
198  sparse_multiplier(A, temp, f_temp);
199 
200 
201  for (int i = 0; i < NSP; ++i) {
202  temp[i] = savedActions[3 * NSP + i] - f_temp[i] - gy[i];
203  }
204  //temp is now equal to Dn3
205 
206  //finally we need the action of the exponential on Dn3
207  int m2 = arnoldi(1.0, 4, h, A, temp, sc, &beta, Vm, Hm, phiHm);
208  if (m2 + 4 >= STRIDE || m2 < 0)
209  {
210  //need to reduce h and try again
211  h /= 5.0;
212  failures++;
213  reject = true;
214  continue;
215  }
216 
217  out[0] = &savedActions[3 * NSP];
218  out[1] = &savedActions[4 * NSP];
219  in[0] = &phiHm[(m2 + 2) * STRIDE];
220  in[1] = &phiHm[(m2 + 3) * STRIDE];
221  scale_vec[0] = beta / (h * h);
222  scale_vec[1] = beta / (h * h * h);
223  matvec_n_by_m_scale_special2(m2, scale_vec, Vm, in, out);
224 
225  //construct y1 and error vector
226 
227  for (int i = 0; i < NSP; ++i) {
228  //y1 = y + h * phi1(h * A) * fy + h * sum(bi * Dni)
229  y1[i] = y[i] + savedActions[i] + 16.0 * savedActions[NSP + i] - 48.0 * savedActions[2 * NSP + i] + -2.0 * savedActions[3 * NSP + i] + 12.0 * savedActions[4 * NSP + i];
230  //error vec
231  temp[i] = 48.0 * savedActions[2 * NSP + i] - 12.0 * savedActions[4 * NSP + i];
232  }
233 
234 
235 #ifndef CONST_TIME_STEP
236  //scale and find err
237  scale (y, y1, f_temp);
238  err = fmax(EPS, sc_norm(temp, f_temp));
239 
240  // classical step size calculation
241  h_new = pow(err, -1.0 / ORD);
242 
243  failures = 0;
244  if (err <= 1.0) {
245 
246  #ifdef LOG_KRYLOV_AND_STEPSIZES
247  fprintf (logFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t, h, err, m, m1, m2);
248  #endif
249 
250  memcpy(sc, f_temp, NSP * sizeof(double));
251 
252  // minimum of classical and Gustafsson step size prediction
253  h_new = fmin(h_new, (h / h_old) * pow((err_old / (err * err)), (1.0 / ORD)));
254 
255  // limit to 0.2 <= (h_new/8) <= 8.0
256  h_new = h * fmax(fmin(0.9 * h_new, 8.0), 0.2);
257 
258  // update y and t
259 
260  for (int i = 0; i < NSP; ++i) {
261  y[i] = y1[i];
262  }
263 
264  t += h;
265 
266  // store time step and error
267  err_old = fmax(1.0e-2, err);
268  h_old = h;
269 
270  // check if last step rejected
271  if (reject) {
272  reject = false;
273  h_new = fmin(h, h_new);
274  }
275  h = fmin(h_new, t_end - t);
276  numSteps++;
277 
278  } else {
279 
280  #ifdef LOG_KRYLOV_AND_STEPSIZES
281  fprintf (rFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t, h, err, m, m1, m2);
282  #endif
283 
284  // limit to 0.2 <= (h_new/8) <= 8.0
285  h_new = h * fmax(fmin(0.9 * h_new, 8.0), 0.2);
286  h_new = fmin(h_new, t_end - t);
287 
288  reject = true;
289  h = fmin(h, h_new);
290  }
291 #else
292  //constant time stepping
293  // update y and t
294  for (int i = 0; i < NSP; ++i) {
295  y[i] = y1[i];
296  }
297 
298  t += h;
299 #endif
300 
301  } // end while
302 
303  #ifdef LOG_KRYLOV_AND_STEPSIZES
304  fclose(logFile);
305  fclose(rFile);
306  #endif
307 
308  return EC_success;
309 }
310 
311 #ifdef GENERATE_DOCS
312 }
313 #endif
Contains header definitions for the RHS function for the van der Pol example.
An example header file that defines system size and other required methods for integration of the van...
__device__ void scale_init(const double *__restrict__ y0, double *__restrict__ sc)
Get scaling for weighted norm for the initial timestep (used in krylov process)
#define EC_consecutive_steps
Maximum number of consecutive internal timesteps with error reached.
int integrate(const double t_start, const double t_end, const double pr, double *y)
4th-order exponential integrator function w/ adaptive Kyrlov subspace approximation ...
Definition: exprb43.c:47
void dydt(const double t, const double mu, const double *__restrict__ y, double *__restrict__ dy)
An implementation of the RHS of the van der Pol equation.
Definition: dydt.c:22
void eval_jacob(const double t, const double pres, const double *cy, double *jac)
Computes a finite difference Jacobian of order FD_ORD of the RHS function dydt at the given pressure ...
Definition: fd_jacob.c:24
#define NSP
The IVP system size.
Definition: header.cuh:20
#define STRIDE
the matrix dimensions
#define MAX_STEPS
Maximum allowed internal timesteps per integration step.
Definition: exp4_props.cuh:30
const char * solver_name()
Returns a descriptive solver name.
Definition: exprb43_init.c:57
__device__ void matvec_n_by_m_scale_special2(const int m, const double *__restrict__ scale, const double *__restrict__ A, double *const __restrict__ *V, double *__restrict__ *Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
#define EC_success
Successful time step.
__device__ void matvec_n_by_m_scale_add(const int m, const double scale, const double *__restrict__ A, const double *__restrict__ V, double *__restrict__ Av, const double *__restrict__ add)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
void sparse_multiplier(const double *A, const double *Vm, double *w)
Implements Jacobian \ vector multiplication in sparse (or unrolled) form.
Various macros controlling behaviour of EXPRB43 algorithm.
__device__ void scale(const double *__restrict__ y0, const double *__restrict__ y1, double *__restrict__ sc)
Get scaling for weighted norm.
__device__ void matvec_m_by_m_plusequal(const int m, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector plus equals for a matrix of size MxM and vector of size Mx1. That is, it returns (A + I) * v.
__device__ int arnoldi(const double scale, const int p, const double h, const double *__restrict__ A, const solver_memory *__restrict__ solver, const double *__restrict__ v, double *__restrict__ beta, double *__restrict__ work, cuDoubleComplex *__restrict__ work2)
Runs the arnoldi iteration to calculate the Krylov projection.
Definition: arnoldi.cuh:51
Contains a header definition for the van der Pol Jacobian evaluation.
#define ORD
order of embedded methods
Definition: exp4_props.cuh:24
__device__ void matvec_n_by_m_scale_special(const int m, const double *__restrict__ scale, const double *__restrict__ A, double *const __restrict__ *V, double *__restrict__ *Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
#define EC_max_steps_exceeded
Maximum number of internal timesteps exceeded.
__device__ double sc_norm(const double *__restrict__ nums, const double *__restrict__ sc)
Perform weighted norm.
__device__ void matvec_n_by_m_scale(const int m, const double scale, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
Header definitions for solver initialization routins.
Implementation of the arnoldi iteration methods.
#define EPS
Implementation of various linear algebra functions needed in the exponential integrators.
#define EC_h_plus_t_equals_h
Timescale reduced such that t + h == t in floating point math.