accelerInt  v0.1
exp4.c
Go to the documentation of this file.
1 
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <math.h>
19 #include <stdbool.h>
20 #include <string.h>
21 
22 #include "header.h"
23 #include "dydt.h"
24 #include "jacob.h"
25 #include "arnoldi.h"
26 #include "exp4_props.h"
28 #include "solver_init.h"
29 #include "sparse_multiplier.h"
30 
31 #ifdef GENERATE_DOCS
32 namespace exp4 {
33 #endif
34 
36 
46 int integrate (const double t_start, const double t_end, const double pr, double* y) {
47 
48  //initial time
49 #ifdef CONST_TIME_STEP
50  double h = t_end - t_start;
51 #else
52  double h = fmin(1.0e-8, t_end - t_start);
53 #endif
54  double h_new;
55 
56  double err_old = 1.0;
57  double h_old = h;
58 
59  bool reject = false;
60  int failures = 0;
61  int steps = 0;
62 
63  double t = t_start;
64 
65  // get scaling for weighted norm
66  double sc[NSP];
67  scale_init(y, sc);
68 
69 #ifdef LOG_KRYLOV_AND_STEPSIZES
70  //file for krylov logging
71  FILE *logFile;
72  //open and clear
73  const char* f_name = solver_name();
74  int len = strlen(f_name);
75  char out_name[len + 17];
76  sprintf(out_name, "log/%s-kry-log.txt", f_name);
77  logFile = fopen(out_name, "a");
78 
79  char out_reject_name[len + 23];
80  sprintf(out_reject_name, "log/%s-kry-reject.txt", f_name);
81  //file for krylov logging
82  FILE *rFile;
83  //open and clear
84  rFile = fopen(out_reject_name, "a");
85 #endif
86 
87  double beta = 0;
88  // source vector
89  double fy[NSP];
90  // Jacobian matrix
91  double A[NSP * NSP] = {0.0};
92 
93  // temporary arrays
94  double temp[NSP];
95  double f_temp[NSP];
96  double y1[NSP];
97  double Hm[STRIDE * STRIDE] = {0.0};
98  double Vm[NSP * STRIDE];
99  double phiHm[STRIDE * STRIDE];
100  double err = 0.0;
101 
102  // i-vectors
103  double k1[NSP];
104  double k2[NSP];
105  double k3[NSP];
106  double k4[NSP];
107  double k5[NSP];
108  double k6[NSP];
109  double k7[NSP];
110  //initial krylov subspace sizes
111  while ((t < t_end) && (t + h > t)) {
112 
113  //error checking
114  if (failures >= MAX_CONSECUTIVE_ERRORS)
115  {
116  return EC_consecutive_steps;
117  }
118  if (steps++ >= MAX_STEPS)
119  {
120  return EC_max_steps_exceeded;
121  }
122  if (t + h <= t)
123  {
124  return EC_h_plus_t_equals_h;
125  }
126 
127  if (!reject) {
128  dydt (t, pr, y, fy);
129  eval_jacob (t, pr, y, A);
130  }
131 
132  //do arnoldi
133  int m = arnoldi(1.0 / 3.0, P, h, A, fy, sc, &beta, Vm, Hm, phiHm);
134  if (m + P >= STRIDE || m < 0)
135  {
136  //need to reduce h and try again
137  h /= 5.0;
138  failures++;
139  reject = true;
140  continue;
141  }
142 
143  //k1 is partially in the first column of phiHm
144  //k1 = beta * Vm * phiHm(:, 1)
145  matvec_n_by_m_scale(m, beta, Vm, phiHm, k1);
146 
147  //k2
148  //computing phi(2h * A)
149  matvec_m_by_m (m, phiHm, phiHm, temp);
150  //note: f_temp will contain hm * phi * phi * e1 for later use
151  matvec_m_by_m (m, Hm, temp, f_temp);
152  matvec_n_by_m_scale_add(m, beta * (h / 6.0), Vm, f_temp, k2, k1);
153 
154  //k3
155  //use the stored hm * phi * phi * e1 to get phi(3h * A)
156  matvec_m_by_m (m, phiHm, f_temp, temp);
157  matvec_m_by_m (m, Hm, temp, f_temp);
158  matvec_n_by_m_scale_add_subtract(m, beta * (h * h / 27.0), Vm, f_temp, k3, k2, k1);
159 
160  // d4
161 
162  for (int i = 0; i < NSP; ++i) {
163  // f4
164  f_temp[i] = h * ((-7.0 / 300.0) * k1[i] + (97.0 / 150.0) * k2[i] - (37.0 / 300.0) * k3[i]);
165 
166  k4[i] = y[i] + f_temp[i];
167  }
168 
169  dydt (t, pr, k4, temp);
170  sparse_multiplier (A, f_temp, k4);
171 
172 
173  for (int i = 0; i < NSP; ++i) {
174  k4[i] = temp[i] - fy[i] - k4[i];
175  }
176 
177  //do arnoldi
178  int m1 = arnoldi(1.0 / 3.0, P, h, A, k4, sc, &beta, Vm, Hm, phiHm);
179  if (m1 + P >= STRIDE || m1 < 0)
180  {
181  //need to reduce h and try again
182  h /= 5.0;
183  failures++;
184  reject = true;
185  continue;
186  }
187  //k4 is partially in the m'th column of phiHm
188  matvec_n_by_m_scale(m1, beta, Vm, phiHm, k4);
189 
190  //k5
191  //computing phi(2h * A)
192  matvec_m_by_m (m1, phiHm, phiHm, temp);
193  //note: f_temp will contain hm * phi * phi * e1 for later use
194  matvec_m_by_m (m1, Hm, temp, f_temp);
195  matvec_n_by_m_scale_add(m1, beta * (h / 6.0), Vm, f_temp, k5, k4);
196 
197  // k6
198  //use the stored hm * phi * phi * e1 to get phi(3h * A)
199  matvec_m_by_m (m1, phiHm, f_temp, temp);
200  matvec_m_by_m (m1, Hm, temp, f_temp);
201  matvec_n_by_m_scale_add_subtract(m1, beta * (h * h / 27.0), Vm, f_temp, k6, k5, k4);
202 
203  // k7
204 
205  for (int i = 0; i < NSP; ++i) {
206  // f7
207  f_temp[i] = h * ((59.0 / 300.0) * k1[i] - (7.0 / 75.0) * k2[i] + (269.0 / 300.0) * k3[i] + (2.0 / 3.0) * (k4[i] + k5[i] + k6[i]));
208 
209  k7[i] = y[i] + f_temp[i];
210  }
211 
212  dydt (t, pr, k7, temp);
213  sparse_multiplier (A, f_temp, k7);
214 
215 
216  for (int i = 0; i < NSP; ++i) {
217  k7[i] = temp[i] - fy[i] - k7[i];
218  }
219 
220  int m2 = arnoldi(1.0 / 3.0, P, h, A, k7, sc, &beta, Vm, Hm, phiHm);
221  if (m2 + P >= STRIDE || m2 < 0)
222  {
223  //need to reduce h and try again
224  h /= 5.0;
225  failures++;
226  reject = true;
227  continue;
228  }
229  //k7 is partially in the m'th column of phiHm
230  matvec_n_by_m_scale(m2, beta / (h / 3.0), Vm, &phiHm[m2 * STRIDE], k7);
231 
232  // y_n+1
233 
234  for (int i = 0; i < NSP; ++i) {
235  y1[i] = y[i] + h * (k3[i] + k4[i] - (4.0 / 3.0) * k5[i] + k6[i] + (1.0 / 6.0) * k7[i]);
236  }
237 
238 #ifndef CONST_TIME_STEP
239  scale (y, y1, f_temp);
240 
242  // calculate errors
244 
245  // error of embedded order 3 method
246 
247  for (int i = 0; i < NSP; ++i) {
248  temp[i] = k3[i] - (2.0 / 3.0) * k5[i] + 0.5 * (k6[i] + k7[i] - k4[i]) - (y1[i] - y[i]) / h;
249  }
250  err = h * sc_norm(temp, f_temp);
251 
252  // error of embedded W method
253 
254  for (int i = 0; i < NSP; ++i) {
255  temp[i] = -k1[i] + 2.0 * k2[i] - k4[i] + k7[i] - (y1[i] - y[i]) / h;
256  }
257  //double err_W = h * sc_norm(temp, sc);
258  err = fmax(EPS, fmin(err, h * sc_norm(temp, f_temp)));
259 
260  // classical step size calculation
261  h_new = pow(err, -1.0 / ORD);
262 
263  failures = 0;
264  if (err <= 1.0) {
265 
266  #ifdef LOG_KRYLOV_AND_STEPSIZES
267  fprintf (logFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t, h, err, m, m1, m2);
268  #endif
269 
270  // minimum of classical and Gustafsson step size prediction
271  h_new = fmin(h_new, (h / h_old) * pow((err_old / (err * err)), (1.0 / ORD)));
272 
273  // limit to 0.2 <= (h_new/8) <= 8.0
274  h_new = h * fmax(fmin(0.9 * h_new, 8.0), 0.2);
275 
276  // update y, t and sc
277  memcpy(sc, f_temp, NSP * sizeof(double));
278  memcpy(y, y1, NSP * sizeof(double));
279  t += h;
280 
281  // store time step and error
282  err_old = fmax(1.0e-2, err);
283  h_old = h;
284 
285  // check if last step rejected
286  if (reject) {
287  reject = false;
288  h_new = fmin(h, h_new);
289  }
290  h = fmin(h_new, t_end - t);
291 
292  } else {
293 
294  #ifdef LOG_KRYLOV_AND_STEPSIZES
295  fprintf (rFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t, h, err, m, m1, m2);
296  #endif
297 
298  // limit to 0.2 <= (h_new/8) <= 8.0
299  h_new = h * fmax(fmin(0.9 * h_new, 8.0), 0.2);
300  h_new = fmin(h_new, t_end - t);
301 
302 
303  reject = true;
304  h = fmin(h, h_new);
305  }
306 #else
307  //constant time stepping
308  // update y and t
309  for (int i = 0; i < NSP; ++i) {
310  y[i] = y1[i];
311  }
312 
313  t += h;
314 #endif
315 
316  } // end while
317 
318  #ifdef LOG_KRYLOV_AND_STEPSIZES
319  fclose(logFile);
320  fclose(rFile);
321  #endif
322 
323  return EC_success;
324 
325 }
326 
327 #ifdef GENERATE_DOCS
328 }
329 #endif
Contains header definitions for the RHS function for the van der Pol example.
int integrate(const double t_start, const double t_end, const double pr, double *y)
4th-order exponential integrator function w/ adaptive Kyrlov subspace approximation ...
Definition: exp4.c:46
Definition: exp4.c:32
An example header file that defines system size and other required methods for integration of the van...
__device__ void scale_init(const double *__restrict__ y0, double *__restrict__ sc)
Get scaling for weighted norm for the initial timestep (used in krylov process)
#define P
max order of the phi functions (for error estimation)
Definition: exp4_props.cuh:22
#define EC_consecutive_steps
Maximum number of consecutive internal timesteps with error reached.
Header definition for Jacobian vector multiplier, used in exponential integrators.
void dydt(const double t, const double mu, const double *__restrict__ y, double *__restrict__ dy)
An implementation of the RHS of the van der Pol equation.
Definition: dydt.c:22
void eval_jacob(const double t, const double pres, const double *cy, double *jac)
Computes a finite difference Jacobian of order FD_ORD of the RHS function dydt at the given pressure ...
Definition: fd_jacob.c:24
#define NSP
The IVP system size.
Definition: header.cuh:20
#define STRIDE
the matrix dimensions
#define MAX_STEPS
Maximum allowed internal timesteps per integration step.
Definition: exp4_props.cuh:30
Various macros controlling behaviour of EXP4 algorithm.
#define EC_success
Successful time step.
__device__ void matvec_n_by_m_scale_add(const int m, const double scale, const double *__restrict__ A, const double *__restrict__ V, double *__restrict__ Av, const double *__restrict__ add)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
void sparse_multiplier(const double *A, const double *Vm, double *w)
Implements Jacobian \ vector multiplication in sparse (or unrolled) form.
__device__ void matvec_n_by_m_scale_add_subtract(const int m, const double scale, const double *__restrict__ A, const double *V, double *__restrict__ Av, const double *__restrict__ add, const double *__restrict__ sub)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
__device__ void scale(const double *__restrict__ y0, const double *__restrict__ y1, double *__restrict__ sc)
Get scaling for weighted norm.
#define MAX_CONSECUTIVE_ERRORS
Number of consecutive errors on internal integration steps allowed before exit.
Definition: exp4_props.cuh:32
__device__ void matvec_m_by_m(const int m, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector multiplication of a matrix sized MxM and a vector Mx1.
__device__ int arnoldi(const double scale, const int p, const double h, const double *__restrict__ A, const solver_memory *__restrict__ solver, const double *__restrict__ v, double *__restrict__ beta, double *__restrict__ work, cuDoubleComplex *__restrict__ work2)
Runs the arnoldi iteration to calculate the Krylov projection.
Definition: arnoldi.cuh:51
Contains a header definition for the van der Pol Jacobian evaluation.
#define ORD
order of embedded methods
Definition: exp4_props.cuh:24
#define EC_max_steps_exceeded
Maximum number of internal timesteps exceeded.
__device__ double sc_norm(const double *__restrict__ nums, const double *__restrict__ sc)
Perform weighted norm.
const char * solver_name()
Returns a descriptive solver name.
Definition: exp4_init.c:59
__device__ void matvec_n_by_m_scale(const int m, const double scale, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
Header definitions for solver initialization routins.
Implementation of the arnoldi iteration methods.
#define EPS
Implementation of various linear algebra functions needed in the exponential integrators.
#define EC_h_plus_t_equals_h
Timescale reduced such that t + h == t in floating point math.