accelerInt  v0.1
radau2a.c
Go to the documentation of this file.
1 
21 #include "header.h"
22 #include "solver_props.h"
23 #include "solver_options.h"
24 #include "lapack_dfns.h"
25 #include "dydt.h"
26 #include "jacob.h"
27 #include <complex.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <string.h>
31 #include <math.h>
32 
33 #ifdef GENERATE_DOCS
34 namespace radau2a {
35 #endif
36 
38 #define Max_no_steps (200000)
39 #define NewtonMaxit (8)
41 #define StartNewton (true)
43 #define Gustafsson
45 #define Roundoff (EPS)
47 #define FacMin (0.2)
49 #define FacMax (8)
51 #define FacSafe (0.9)
53 #define FacRej (0.1)
55 #define ThetaMin (0.001)
57 #define NewtonTol (0.03)
59 #define Qmin (1.0)
61 #define Qmax (1.2)
63 //#define UNROLL (8)
65 #define Max_consecutive_errs (5)
66 //#define SDIRK_ERROR
67 
74 static inline void scale (const double * __restrict__ y0,
75  const double* __restrict__ y,
76  double * __restrict__ sc) {
77 
78  for (int i = 0; i < NSP; ++i) {
79  sc[i] = 1.0 / (ATOL + fmax(fabs(y0[i]), fabs(y[i])) * RTOL);
80  }
81 }
82 
88 static inline void scale_init (const double * __restrict__ y0,
89  double * __restrict__ sc) {
90 
91  for (int i = 0; i < NSP; ++i) {
92  sc[i] = 1.0 / (ATOL + fabs(y0[i]) * RTOL);
93  }
94 }
95 
101 const static double rkA[3][3] = { {
102  1.968154772236604258683861429918299e-1,
103  -6.55354258501983881085227825696087e-2,
104  2.377097434822015242040823210718965e-2
105  }, {
106  3.944243147390872769974116714584975e-1,
107  2.920734116652284630205027458970589e-1,
108  -4.154875212599793019818600988496743e-2
109  }, {
110  3.764030627004672750500754423692808e-1,
111  5.124858261884216138388134465196080e-1,
112  1.111111111111111111111111111111111e-1
113  }
114 };
115 
116 const static double rkB[3] = {
117 3.764030627004672750500754423692808e-1,
118 5.124858261884216138388134465196080e-1,
119 1.111111111111111111111111111111111e-1
120 };
121 
122 const static double rkC[3] = {
123 1.550510257216821901802715925294109e-1,
124 6.449489742783178098197284074705891e-1,
125 1.0
126 };
127 
128 #ifdef SDIRK_ERROR
129  // Classical error estimator:
130  // H* Sum (B_j-Bhat_j)*f(Z_j) = H*E(0)*f(0) + Sum E_j*Z_j
131  const static double rkE[4] = {
132  0.02,
133  -10.04880939982741556246032950764708*0.02,
134  1.382142733160748895793662840980412*0.02,
135  -0.3333333333333333333333333333333333*0.02
136  };
137  // H* Sum Bgam_j*f(Z_j) = H*Bgam(0)*f(0) + Sum Theta_j*Z_j
138  const static double rkTheta[3] = {
139  -1.520677486405081647234271944611547 - 10.04880939982741556246032950764708*0.02,
140  2.070455145596436382729929151810376 + 1.382142733160748895793662840980413*0.02,
141  -0.3333333333333333333333333333333333*0.02 - 0.3744441479783868387391430179970741
142  };
143  // ! Sdirk error estimator
144  const static double rkBgam[5] = {
145  0.02,
146  0.3764030627004672750500754423692807-1.558078204724922382431975370686279*0.02,
147  0.8914115380582557157653087040196118*0.02+0.5124858261884216138388134465196077,
148  -0.1637777184845662566367174924883037-0.3333333333333333333333333333333333*0.02,
149  0.2748888295956773677478286035994148
150  };
151 #else
152  // Classical error estimator:
153  // H* Sum (B_j-Bhat_j)*f(Z_j) = H*E(0)*f(0) + Sum E_j*Z_j
154  const static double rkE[4] = {
155  0.05,
156  -10.04880939982741556246032950764708*0.05,
157  1.382142733160748895793662840980412*0.05,
158  -0.3333333333333333333333333333333333*0.05
159  };
160  // H* Sum Bgam_j*f(Z_j) = H*Bgam(0)*f(0) + Sum Theta_j*Z_j
161  const static double rkTheta[3] = {
162  -1.520677486405081647234271944611547 - 10.04880939982741556246032950764708*0.05,
163  2.070455145596436382729929151810376 + 1.382142733160748895793662840980413*0.05,
164  -0.3333333333333333333333333333333333*0.05 - 0.3744441479783868387391430179970741
165  };
166 #endif
167 
168 //Local order of error estimator
169 /*
170 !~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
171 !~~~> Diagonalize the RK matrix:
172 ! rkTinv * inv(rkA) * rkT =
173 ! | rkGamma 0 0 |
174 ! | 0 rkAlpha -rkBeta |
175 ! | 0 rkBeta rkAlpha |
176 !~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
177 */
178 
179 const static double rkGamma = 3.637834252744495732208418513577775;
180 const static double rkAlpha = 2.681082873627752133895790743211112;
181 const static double rkBeta = 3.050430199247410569426377624787569;
182 
183 const static double rkT[3][3] = {
184 {9.443876248897524148749007950641664e-2,
185 -1.412552950209542084279903838077973e-1,
186 -3.00291941051474244918611170890539e-2},
187 {2.502131229653333113765090675125018e-1,
188 2.041293522937999319959908102983381e-1,
189 3.829421127572619377954382335998733e-1},
190 {1.0,
191 1.0,
192 0.0e0}
193 };
194 
195 const static double rkTinv[3][3] =
196 {{4.178718591551904727346462658512057,
197 3.27682820761062387082533272429617e-1,
198 5.233764454994495480399309159089876e-1},
199 {-4.178718591551904727346462658512057,
200 -3.27682820761062387082533272429617e-1,
201 4.766235545005504519600690840910124e-1},
202 {-5.02872634945786875951247343139544e-1,
203 2.571926949855605429186785353601676e0,
204 -5.960392048282249249688219110993024e-1}
205 };
206 
207 const static double rkTinvAinv[3][3] = {
208 {1.520148562492775501049204957366528e+1,
209 1.192055789400527921212348994770778,
210 1.903956760517560343018332287285119},
211 {-9.669512977505946748632625374449567,
212 -8.724028436822336183071773193986487,
213 3.096043239482439656981667712714881},
214 {-1.409513259499574544876303981551774e+1,
215 5.895975725255405108079130152868952,
216 -1.441236197545344702389881889085515e-1}
217 };
218 
219 const static double rkAinvT[3][3] = {
220 {0.3435525649691961614912493915818282,
221 -0.4703191128473198422370558694426832,
222 0.3503786597113668965366406634269080},
223 {0.9102338692094599309122768354288852,
224 1.715425895757991796035292755937326,
225 0.4040171993145015239277111187301784},
226 {3.637834252744495732208418513577775,
227 2.681082873627752133895790743211112,
228 -3.050430199247410569426377624787569}
229 };
230 
231 const static double rkELO = 4;
232 
237 static char TRANS = 'N';
240 static int NRHS = 1;
242 static int ARRSIZE = NSP;
243 
245 
256 static void RK_Decomp(const double H, double* __restrict__ E1,
257  double complex* __restrict__ E2, const double* __restrict__ Jac,
258  int* __restrict__ ipiv1, int* __restrict__ ipiv2, int* __restrict__ info) {
259  double complex temp2 = rkAlpha/H + I * rkBeta/H;
260  double temp1 = rkGamma / H;
261 
262  for (int i = 0; i < NSP; i++)
263  {
264 
265  for(int j = 0; j < NSP; j++)
266  {
267  E1[i + j * NSP] = -Jac[i + j * NSP];
268  E2[i + j * NSP] = -Jac[i + j * NSP] + 0 * I;
269  }
270  E1[i + i * NSP] += temp1;
271  E2[i + i * NSP] += temp2;
272  }
273  dgetrf_(&ARRSIZE, &ARRSIZE, E1, &ARRSIZE, ipiv1, info);
274  if (*info != 0) {
275  return;
276  }
277  zgetrf_(&ARRSIZE, &ARRSIZE, E2, &ARRSIZE, ipiv2, info);
278 }
279 
283 static void RK_Make_Interpolate(const double* __restrict__ Z1, const double* __restrict__ Z2,
284  const double* __restrict__ Z3, double* __restrict__ CONT) {
285  double den = (rkC[2] - rkC[1]) * (rkC[1] - rkC[0]) * (rkC[0] - rkC[2]);
286 
287  for (int i = 0; i < NSP; i++) {
288  CONT[i] = ((-rkC[2] * rkC[2] * rkC[1] * Z1[i] + Z3[i] * rkC[1]* rkC[0] * rkC[0]
289  + rkC[1] * rkC[1] * rkC[2] * Z1[i] - rkC[1] * rkC[1] * rkC[0] * Z3[i]
290  + rkC[2] * rkC[2] * rkC[0] * Z2[i] - Z2[i] * rkC[2] * rkC[0] * rkC[0])
291  /den)-Z3[i];
292  CONT[NSP + i] = -( rkC[0] * rkC[0] * (Z3[i] - Z2[i]) + rkC[1] * rkC[1] * (Z1[i] - Z3[i])
293  + rkC[2] * rkC[2] * (Z2[i] - Z1[i]) )/den;
294  CONT[NSP + NSP + i] = ( rkC[0] * (Z3[i] - Z2[i]) + rkC[1] * (Z1[i] - Z3[i])
295  + rkC[2] * (Z2[i] - Z1[i]) ) / den;
296  }
297 }
298 
302 static void RK_Interpolate(const double H, const double Hold, double* __restrict__ Z1,
303  double* __restrict__ Z2, double* __restrict__ Z3, const double* __restrict__ CONT) {
304  double r = H / Hold;
305  double x1 = 1.0 + rkC[0] * r;
306  double x2 = 1.0 + rkC[1] * r;
307  double x3 = 1.0 + rkC[2] * r;
308 
309  for (int i = 0; i < NSP; i++) {
310  Z1[i] = CONT[i] + x1 * (CONT[NSP + i] + x1 * CONT[NSP + NSP + i]);
311  Z2[i] = CONT[i] + x2 * (CONT[NSP + i] + x2 * CONT[NSP + NSP + i]);
312  Z3[i] = CONT[i] + x2 * (CONT[NSP + i] + x3 * CONT[NSP + NSP + i]);
313  }
314 }
315 
316 
320 static inline void WADD(const double* __restrict__ X, const double* __restrict__ Y,
321  double* __restrict__ Z) {
322 
323  for (int i = 0; i < NSP; i++)
324  {
325  Z[i] = X[i] + Y[i];
326  }
327 }
328 
338 static inline void DAXPY3(const double DA1, const double DA2, const double DA3,
339  const double* __restrict__ DX, double* __restrict__ DY1,
340  double* __restrict__ DY2, double* __restrict__ DY3) {
341 
342  for (int i = 0; i < NSP; i++) {
343  DY1[i] += DA1 * DX[i];
344  DY2[i] += DA2 * DX[i];
345  DY3[i] += DA3 * DX[i];
346  }
347 }
348 
353 static void RK_PrepareRHS(const double t, const double pr, const double H,
354  const double* __restrict__ Y, const double* __restrict__ Z1,
355  const double* __restrict__ Z2, const double* __restrict__ Z3,
356  double* __restrict__ R1, double* __restrict__ R2, double* __restrict__ R3) {
357  double TMP[NSP];
358  double F[NSP];
359 
360  for (int i = 0; i < NSP; i++) {
361  R1[i] = Z1[i];
362  R2[i] = Z2[i];
363  R3[i] = Z3[i];
364  }
365 
366  // TMP = Y + Z1
367  WADD(Y, Z1, TMP);
368  dydt(t + rkC[0] * H, pr, TMP, F);
369  //R[:] -= -h * rkA[:][0] * F[:]
370  DAXPY3(-H * rkA[0][0], -H * rkA[1][0], -H * rkA[2][0], F, R1, R2, R3);
371 
372  // TMP = Y + Z2
373  WADD(Y, Z2, TMP);
374  dydt(t + rkC[1] * H, pr, TMP, F);
375  //R[:] -= -h * rkA[:][1] * F[:]
376  DAXPY3(-H * rkA[0][1], -H * rkA[1][1], -H * rkA[2][1], F, R1, R2, R3);
377 
378  // TMP = Y + Z3
379  WADD(Y, Z3, TMP);
380  dydt(t + rkC[2] * H, pr, TMP, F);
381  //R[:] -= -h * rkA[:][2] * F[:]
382  DAXPY3(-H * rkA[0][2], -H * rkA[1][2], -H * rkA[2][2], F, R1, R2, R3);
383 }
384 
388 static void RK_Solve(const double H, double* __restrict__ E1,
389  double complex* __restrict__ E2, double* __restrict__ R1,
390  double* __restrict__ R2, double* __restrict__ R3, int* __restrict__ ipiv1,
391  int* __restrict__ ipiv2) {
392  // Z = (1/h) T^(-1) A^(-1) * Z
393 
394  for(int i = 0; i < NSP; i++)
395  {
396  double x1 = R1[i] / H;
397  double x2 = R2[i] / H;
398  double x3 = R3[i] / H;
399  R1[i] = rkTinvAinv[0][0] * x1 + rkTinvAinv[0][1] * x2 + rkTinvAinv[0][2] * x3;
400  R2[i] = rkTinvAinv[1][0] * x1 + rkTinvAinv[1][1] * x2 + rkTinvAinv[1][2] * x3;
401  R3[i] = rkTinvAinv[2][0] * x1 + rkTinvAinv[2][1] * x2 + rkTinvAinv[2][2] * x3;
402  }
403  int info = 0;
404  dgetrs_ (&TRANS, &ARRSIZE, &NRHS, E1, &ARRSIZE, ipiv1, R1, &ARRSIZE, &info);
405 #ifdef DEBUG
406  if (info != 0) {
407  printf("Error in back-substitution\n");
408  exit(-1);
409  }
410 #endif
411  double complex temp[NSP];
412 
413  for (int i = 0; i < NSP; ++i)
414  {
415  temp[i] = R2[i] + I * R3[i];
416  }
417  zgetrs_(&TRANS, &ARRSIZE, &NRHS, E2, &ARRSIZE, ipiv2, temp, &ARRSIZE, &info);
418 #ifdef DEBUG
419  if (info != 0) {
420  printf("Error in back-substitution\n");
421  exit(-1);
422  }
423 #endif
424 
425  for (int i = 0; i < NSP; ++i)
426  {
427  R2[i] = creal(temp[i]);
428  R3[i] = cimag(temp[i]);
429  }
430 
431  // Z = T * Z
432 
433  for (int i = 0; i < NSP; ++i) {
434  double x1 = R1[i];
435  double x2 = R2[i];
436  double x3 = R3[i];
437  R1[i] = rkT[0][0] * x1 + rkT[0][1] * x2 + rkT[0][2] * x3;
438  R2[i] = rkT[1][0] * x1 + rkT[1][1] * x2 + rkT[1][2] * x3;
439  R3[i] = rkT[2][0] * x1 + rkT[2][1] * x2 + rkT[2][2] * x3;
440  }
441 }
442 
446 static inline double RK_ErrorNorm(const double* __restrict__ scale, double* __restrict__ DY) {
447 
448  double sum = 0;
449  for (int i = 0; i < NSP; ++i){
450  sum += (scale[i] * scale[i] * DY[i] * DY[i]);
451  }
452  return fmax(sqrt(sum / ((double)NSP)), 1e-10);
453 }
454 
458 static double RK_ErrorEstimate(const double H, const double t, const double pr,
459  const double* __restrict__ Y, const double* __restrict__ F0,
460  const double* __restrict__ Z1, const double* __restrict__ Z2, const double* __restrict__ Z3,
461  const double* __restrict__ scale, double* __restrict__ E1, int* __restrict__ ipiv1,
462  const bool FirstStep, const bool Reject) {
463  double HrkE1 = rkE[1]/H;
464  double HrkE2 = rkE[2]/H;
465  double HrkE3 = rkE[3]/H;
466 
467  double F1[NSP] = {0};
468  double F2[NSP] = {0};
469  double TMP[NSP] = {0};
470 
471  for (int i = 0; i < NSP; ++i) {
472  F2[i] = HrkE1 * Z1[i] + HrkE2 * Z2[i] + HrkE3 * Z3[i];
473  }
474 
475  for (int i = 0; i < NSP; ++i) {
476  TMP[i] = rkE[0] * F0[i] + F2[i];
477  }
478  int info = 0;
479  dgetrs_ (&TRANS, &ARRSIZE, &NRHS, E1, &ARRSIZE, ipiv1, TMP, &ARRSIZE, &info);
480 #ifdef DEBUG
481  //this is only true on an incorrect call of dgetrs, hence ignore
482  if (info != 0) {
483  printf("Error on back-substitution.");
484  exit(-1);
485  }
486 #endif
487  double Err = RK_ErrorNorm(scale, TMP);
488  if (Err >= 1.0 && (FirstStep || Reject)) {
489 
490  for (int i = 0; i < NSP; i++) {
491  TMP[i] += Y[i];
492  }
493  dydt(t, pr, TMP, F1);
494 
495  for (int i = 0; i < NSP; i++) {
496  TMP[i] = F1[i] + F2[i];
497  }
498  dgetrs_ (&TRANS, &ARRSIZE, &NRHS, E1, &ARRSIZE, ipiv1, TMP, &ARRSIZE, &info);
499 #ifdef DEBUG
500  if (info != 0) {
501  printf("Error on back-substitution.");
502  exit(-1);
503  }
504 #endif
505  Err = RK_ErrorNorm(scale, TMP);
506  }
507  return Err;
508 }
509 
520 int integrate (const double t_start, const double t_end, const double pr, double* y) {
521  double Hmin = 0;
522  double Hold = 0;
523 #ifdef Gustafsson
524  double Hacc = 0;
525  double ErrOld = 0;
526 #endif
527 #ifdef CONST_TIME_STEP
528  double H = t_end - t_start;
529 #else
530  double H = fmin(5e-7, t_end - t_start);
531 #endif
532  double Hnew;
533  double t = t_start;
534  bool Reject = false;
535  bool FirstStep = true;
536  bool SkipJac = false;
537  bool SkipLU = false;
538  double sc[NSP];
539  double A[NSP * NSP] = {0.0};
540  double E1[NSP * NSP] = {0};
541  double complex E2[NSP * NSP] = {0};
542  int ipiv1[NSP] = {0};
543  int ipiv2[NSP] = {0};
544  double Z1[NSP] = {0};
545  double Z2[NSP] = {0};
546  double Z3[NSP] = {0};
547 #ifdef SDIRK_ERROR
548  double Z4[NSP] = {0};
549  double DZ4[NSP] = {0};
550  double G[NSP] = {0};
551  double TMP[NSP] = {0};
552 #endif
553  double DZ1[NSP] = {0};
554  double DZ2[NSP] = {0};
555  double DZ3[NSP] = {0};
556  double CONT[NSP * 3] = {0};
557  scale_init(y, sc);
558  double y0[NSP];
559  memcpy(y0, y, NSP * sizeof(double));
560  double F0[NSP];
561  int info = 0;
562  int Nconsecutive = 0;
563  int Nsteps = 0;
564  double NewtonRate = pow(2.0, 1.25);
565 
566  while (t + Roundoff < t_end) {
567  if (!Reject) {
568  dydt (t, pr, y, F0);
569  }
570  if (!SkipLU) {
571  //need to update Jac/LU
572  if (!SkipJac) {
573  eval_jacob (t, pr, y, A);
574  }
575  RK_Decomp(H, E1, E2, A, ipiv1, ipiv2, &info);
576  if (info != 0) {
577  Nconsecutive += 1;
578  if (Nconsecutive >= Max_consecutive_errs)
579  {
580  return EC_consecutive_steps;
581  }
582  H *= 0.5;
583  Reject = true;
584  SkipJac = true;
585  SkipLU = false;
586  continue;
587  }
588  else
589  {
590  Nconsecutive = 0;
591  }
592  }
593  Nsteps += 1;
594  if (Nsteps >= Max_no_steps) {
595  return EC_max_steps_exceeded;
596  }
597  if (0.1 * fabs(H) <= fabs(t) * Roundoff) {
598  return EC_h_plus_t_equals_h;
599  }
600  if (FirstStep || !StartNewton) {
601  memset(Z1, 0, NSP * sizeof(double));
602  memset(Z2, 0, NSP * sizeof(double));
603  memset(Z3, 0, NSP * sizeof(double));
604  } else {
605  RK_Interpolate(H, Hold, Z1, Z2, Z3, CONT);
606  }
607  bool NewtonDone = false;
608  double NewtonIncrementOld = 0;
609  double Fac = 0.5; //Step reduction if too many iterations
610  int NewtonIter = 0;
611  double Theta = 0;
612 
613  //reuse previous NewtonRate
614  NewtonRate = pow(fmax(NewtonRate, EPS), 0.8);
615 
616  for (; NewtonIter < NewtonMaxit; NewtonIter++) {
617  RK_PrepareRHS(t, pr, H, y, Z1, Z2, Z3, DZ1, DZ2, DZ3);
618  RK_Solve(H, E1, E2, DZ1, DZ2, DZ3, ipiv1, ipiv2);
619  double d1 = RK_ErrorNorm(sc, DZ1);
620  double d2 = RK_ErrorNorm(sc, DZ2);
621  double d3 = RK_ErrorNorm(sc, DZ3);
622  double NewtonIncrement = sqrt((d1 * d1 + d2 * d2 + d3 * d3) / 3.0);
623  Theta = ThetaMin;
624  if (NewtonIter > 0)
625  {
626  Theta = NewtonIncrement / NewtonIncrementOld;
627  if (Theta < 0.99) {
628  NewtonRate = Theta / (1.0 - Theta);
629  }
630  else { // Non-convergence of Newton: Theta too large
631  break;
632  }
633  if (NewtonIter < NewtonMaxit) {
634  //Predict error at the end of Newton process
635  double NewtonPredictedErr = (NewtonIncrement * pow(Theta, (NewtonMaxit - NewtonIter - 1))) / (1.0 - Theta);
636  if (NewtonPredictedErr >= NewtonTol) {
637  //Non-convergence of Newton: predicted error too large
638  double Qnewton = fmin(10.0, NewtonPredictedErr / NewtonTol);
639  Fac = 0.8 * pow(Qnewton, -1.0/((double)(NewtonMaxit-NewtonIter)));
640  break;
641  }
642  }
643  }
644 
645  NewtonIncrementOld = fmax(NewtonIncrement, Roundoff);
646  // Update solution
647 
648  for (int i = 0; i < NSP; i++)
649  {
650  Z1[i] -= DZ1[i];
651  Z2[i] -= DZ2[i];
652  Z3[i] -= DZ3[i];
653  }
654 
655  NewtonDone = (NewtonRate * NewtonIncrement <= NewtonTol);
656  if (NewtonDone) break;
657  if (NewtonIter == NewtonMaxit - 1) {
659  }
660  }
661 #ifndef CONST_TIME_STEP
662  if (!NewtonDone) {
663  H = Fac * H;
664  Reject = true;
665  SkipJac = true;
666  SkipLU = false;
667  continue;
668  }
669 
670  double Err = RK_ErrorEstimate(H, t, pr, y, F0, Z1, Z2, Z3, sc, E1, ipiv1, FirstStep, Reject);
671  //~~~> Computation of new step size Hnew
672  Fac = pow(Err, (-1.0 / rkELO)) * (1.0 + 2 * NewtonMaxit) / (NewtonIter + 1.0 + 2 * NewtonMaxit);
673  Fac = fmin(FacMax, fmax(FacMin, Fac));
674  Hnew = Fac * H;
675  if (Err < 1.0) {
676 #ifdef Gustafsson
677  if (!FirstStep) {
678  double FacGus = FacSafe * (H / Hacc) * pow(Err * Err / ErrOld, -0.25);
679  FacGus = fmin(FacMax, fmax(FacMin, FacGus));
680  Fac = fmin(Fac, FacGus);
681  Hnew = Fac * H;
682  }
683  Hacc = H;
684  ErrOld = fmax(1e-2, Err);
685 #endif
686  FirstStep = false;
687  Hold = H;
688  t += H;
689 
690  for (int i = 0; i < NSP; i++) {
691  y[i] += Z3[i];
692  }
693  // Construct the solution quadratic interpolant Q(c_i) = Z_i, i=1:3
694  if (StartNewton) {
695  RK_Make_Interpolate(Z1, Z2, Z3, CONT);
696  }
697  scale(y, y0, sc);
698  memcpy(y0, y, NSP * sizeof(double));
699  Hnew = fmin(fmax(Hnew, Hmin), t_end - t);
700  if (Reject) {
701  Hnew = fmin(Hnew, H);
702  }
703  Reject = false;
704  if (t + Hnew / Qmin - t_end >= 0.0) {
705  H = t_end - t;
706  } else {
707  double Hratio = Hnew / H;
708  // Reuse the LU decomposition
709  SkipLU = (Theta <= ThetaMin) && (Hratio>=Qmin) && (Hratio<=Qmax);
710  if (!SkipLU) H = Hnew;
711  }
712  // If convergence is fast enough, do not update Jacobian
713  SkipJac = NewtonIter == 1 || NewtonRate <= ThetaMin;
714  }
715  else {
716  if (FirstStep || Reject) {
717  H = FacRej * H;
718  } else {
719  H = Hnew;
720  }
721  Reject = true;
722  SkipJac = true;
723  SkipLU = false;
724  }
725 #else
726  //constant time stepping
727  //update y & t
728  t += H;
729 
730  for (int i = 0; i < NSP; i++) {
731  y[i] += Z3[i];
732  }
733 #endif
734  }
735  return EC_success;
736 }
737 
738 #ifdef GENERATE_DOCS
739 }
740 #endif
static void DAXPY3(const double DA1, const double DA2, const double DA3, const double *__restrict__ DX, double *__restrict__ DY1, double *__restrict__ DY2, double *__restrict__ DY3)
Sepcialization of DAXPY with unrolled (or at least bounds known at compile time) loops.
Definition: radau2a.c:338
Contains header definitions for the RHS function for the van der Pol example.
#define FacRej
Time step factor on rejected step.
Definition: radau2a.c:54
#define Qmin
Min Timestep ratio to skip LU decomposition.
Definition: radau2a.c:60
#define FacSafe
Safety factor for Gustafsson time stepping control.
Definition: radau2a.c:52
An example header file that defines system size and other required methods for integration of the van...
static const double rkBeta
Definition: radau2a.c:181
#define EC_consecutive_steps
Maximum number of consecutive internal timesteps with error reached.
void dydt(const double t, const double mu, const double *__restrict__ y, double *__restrict__ dy)
An implementation of the RHS of the van der Pol equation.
Definition: dydt.c:22
void eval_jacob(const double t, const double pres, const double *cy, double *jac)
Computes a finite difference Jacobian of order FD_ORD of the RHS function dydt at the given pressure ...
Definition: fd_jacob.c:24
#define Qmax
Max Timestep ratio to skip LU decomposition.
Definition: radau2a.c:62
static const double rkELO
Definition: radau2a.c:231
static const double rkTinv[3][3]
Definition: radau2a.c:195
static double RK_ErrorEstimate(const double H, const double t, const double pr, const double *__restrict__ Y, const double *__restrict__ F0, const double *__restrict__ Z1, const double *__restrict__ Z2, const double *__restrict__ Z3, const double *__restrict__ scale, double *__restrict__ E1, int *__restrict__ ipiv1, const bool FirstStep, const bool Reject)
Computes and returns the error estimate for this step.
Definition: radau2a.c:458
static const double rkT[3][3]
Definition: radau2a.c:183
static const double rkTinvAinv[3][3]
Definition: radau2a.c:207
static int NRHS
Lapack - 1 RHS solve.
Definition: radau2a.c:240
#define ThetaMin
Minimum Newton convergence rate.
Definition: radau2a.c:56
static void scale(const double *__restrict__ y0, const double *__restrict__ y, double *__restrict__ sc)
Computes error weight scaling from initial and current state.
Definition: radau2a.c:74
#define NSP
The IVP system size.
Definition: header.cuh:20
void zgetrs_(const char *trans, const int *n, const int *nrhs, double complex *A, const int *LDA, int *ipiv, double complex *B, const int *LDB, int *info)
A file generated by Scons that specifies various options to the solvers.
static const double rkGamma
Definition: radau2a.c:179
static const double rkC[3]
Definition: radau2a.c:122
static void scale_init(const double *__restrict__ y0, double *__restrict__ sc)
Computes error weight scaling from initial state.
Definition: radau2a.c:88
#define NewtonTol
Newton convergence tolerance.
Definition: radau2a.c:58
static const double rkAinvT[3][3]
Definition: radau2a.c:219
#define Roundoff
Smallist representable double precision number.
Definition: radau2a.c:46
#define RTOL
#define EC_success
Successful time step.
External lapack routine definitions.
static void RK_PrepareRHS(const double t, const double pr, const double H, const double *__restrict__ Y, const double *__restrict__ Z1, const double *__restrict__ Z2, const double *__restrict__ Z3, double *__restrict__ R1, double *__restrict__ R2, double *__restrict__ R3)
Prepare the right-hand side for Newton iterations: .
Definition: radau2a.c:353
#define ATOL
void dgetrf_(const int *m, const int *n, double *A, const int *lda, int *ipiv, int *info)
simple convenience file to include the correct solver properties file
#define Max_consecutive_errs
Error allowed on this many consecutive internal timesteps before exit.
Definition: radau2a.c:65
static int ARRSIZE
Lapack - Array size.
Definition: radau2a.c:242
#define StartNewton
Use quadratic interpolation from previous step if possible.
Definition: radau2a.c:42
int integrate(const double t_start, const double t_end, const double pr, double *y)
5th-order Radau2A CPU implementation
Definition: radau2a.c:520
#define NewtonMaxit
Maximum number of allowed Newton iteration steps before error.
Definition: radau2a.c:40
static double RK_ErrorNorm(const double *__restrict__ scale, double *__restrict__ DY)
Computes the scaled error norm from the given scale and DY vectors.
Definition: radau2a.c:446
#define FacMax
Controls maximum increase in timestep size.
Definition: radau2a.c:50
static void WADD(const double *__restrict__ X, const double *__restrict__ Y, double *__restrict__ Z)
Performs with unrolled (or at least bounds known at compile time) loops.
Definition: radau2a.c:320
static const double rkAlpha
Definition: radau2a.c:180
void dgetrs_(const char *trans, const int *n, const int *nrhs, double *A, const int *LDA, int *ipiv, double *B, const int *LDB, int *info)
Contains a header definition for the van der Pol Jacobian evaluation.
static const double rkB[3]
Definition: radau2a.c:116
#define EC_newton_max_iterations_exceeded
Maximum allowed Newton Iteration steps exceeded.
static void RK_Interpolate(const double H, const double Hold, double *__restrict__ Z1, double *__restrict__ Z2, double *__restrict__ Z3, const double *__restrict__ CONT)
Apply quadaratic interpolate to get initial values.
Definition: radau2a.c:302
#define EC_max_steps_exceeded
Maximum number of internal timesteps exceeded.
#define FacMin
Controls maximum decrease in timestep size.
Definition: radau2a.c:48
static void RK_Decomp(const double H, double *__restrict__ E1, double complex *__restrict__ E2, const double *__restrict__ Jac, int *__restrict__ ipiv1, int *__restrict__ ipiv2, int *__restrict__ info)
Compute E1 & E2 matricies and their LU Decomposition.
Definition: radau2a.c:256
static const double rkTheta[3]
Definition: radau2a.c:161
void zgetrf_(const int *m, const int *n, double complex *A, const int *lda, int *ipiv, int *info)
static void RK_Solve(const double H, double *__restrict__ E1, double complex *__restrict__ E2, double *__restrict__ R1, double *__restrict__ R2, double *__restrict__ R3, int *__restrict__ ipiv1, int *__restrict__ ipiv2)
Solves for the RHS values in the Newton iteration.
Definition: radau2a.c:388
#define EPS
static char TRANS
Lapack - non-transpose.
Definition: radau2a.c:238
#define Max_no_steps
Maximum number of allowed internal timesteps before error.
Definition: radau2a.c:38
static const double rkE[4]
Definition: radau2a.c:154
static const double rkA[3][3]
Definition: radau2a.c:101
#define EC_h_plus_t_equals_h
Timescale reduced such that t + h == t in floating point math.
static void RK_Make_Interpolate(const double *__restrict__ Z1, const double *__restrict__ Z2, const double *__restrict__ Z3, double *__restrict__ CONT)
Compute Quadaratic interpolate.
Definition: radau2a.c:283