38 #define Max_no_steps (200000) 39 #define NewtonMaxit (8) 41 #define StartNewton (true) 45 #define Roundoff (EPS) 55 #define ThetaMin (0.001) 57 #define NewtonTol (0.03) 65 #define Max_consecutive_errs (5) 74 static inline void scale (
const double * __restrict__ y0,
75 const double* __restrict__ y,
76 double * __restrict__ sc) {
78 for (
int i = 0; i <
NSP; ++i) {
79 sc[i] = 1.0 / (
ATOL + fmax(fabs(y0[i]), fabs(y[i])) *
RTOL);
88 static inline void scale_init (
const double * __restrict__ y0,
89 double * __restrict__ sc) {
91 for (
int i = 0; i <
NSP; ++i) {
92 sc[i] = 1.0 / (
ATOL + fabs(y0[i]) *
RTOL);
101 const static double rkA[3][3] = { {
102 1.968154772236604258683861429918299e-1,
103 -6.55354258501983881085227825696087e-2,
104 2.377097434822015242040823210718965e-2
106 3.944243147390872769974116714584975e-1,
107 2.920734116652284630205027458970589e-1,
108 -4.154875212599793019818600988496743e-2
110 3.764030627004672750500754423692808e-1,
111 5.124858261884216138388134465196080e-1,
112 1.111111111111111111111111111111111e-1
116 const static double rkB[3] = {
117 3.764030627004672750500754423692808e-1,
118 5.124858261884216138388134465196080e-1,
119 1.111111111111111111111111111111111e-1
122 const static double rkC[3] = {
123 1.550510257216821901802715925294109e-1,
124 6.449489742783178098197284074705891e-1,
131 const static double rkE[4] = {
133 -10.04880939982741556246032950764708*0.02,
134 1.382142733160748895793662840980412*0.02,
135 -0.3333333333333333333333333333333333*0.02
138 const static double rkTheta[3] = {
139 -1.520677486405081647234271944611547 - 10.04880939982741556246032950764708*0.02,
140 2.070455145596436382729929151810376 + 1.382142733160748895793662840980413*0.02,
141 -0.3333333333333333333333333333333333*0.02 - 0.3744441479783868387391430179970741
144 const static double rkBgam[5] = {
146 0.3764030627004672750500754423692807-1.558078204724922382431975370686279*0.02,
147 0.8914115380582557157653087040196118*0.02+0.5124858261884216138388134465196077,
148 -0.1637777184845662566367174924883037-0.3333333333333333333333333333333333*0.02,
149 0.2748888295956773677478286035994148
154 const static double rkE[4] = {
156 -10.04880939982741556246032950764708*0.05,
157 1.382142733160748895793662840980412*0.05,
158 -0.3333333333333333333333333333333333*0.05
162 -1.520677486405081647234271944611547 - 10.04880939982741556246032950764708*0.05,
163 2.070455145596436382729929151810376 + 1.382142733160748895793662840980413*0.05,
164 -0.3333333333333333333333333333333333*0.05 - 0.3744441479783868387391430179970741
179 const static double rkGamma = 3.637834252744495732208418513577775;
180 const static double rkAlpha = 2.681082873627752133895790743211112;
181 const static double rkBeta = 3.050430199247410569426377624787569;
183 const static double rkT[3][3] = {
184 {9.443876248897524148749007950641664e-2,
185 -1.412552950209542084279903838077973e-1,
186 -3.00291941051474244918611170890539e-2},
187 {2.502131229653333113765090675125018e-1,
188 2.041293522937999319959908102983381e-1,
189 3.829421127572619377954382335998733e-1},
196 {{4.178718591551904727346462658512057,
197 3.27682820761062387082533272429617e-1,
198 5.233764454994495480399309159089876e-1},
199 {-4.178718591551904727346462658512057,
200 -3.27682820761062387082533272429617e-1,
201 4.766235545005504519600690840910124e-1},
202 {-5.02872634945786875951247343139544e-1,
203 2.571926949855605429186785353601676e0,
204 -5.960392048282249249688219110993024e-1}
208 {1.520148562492775501049204957366528e+1,
209 1.192055789400527921212348994770778,
210 1.903956760517560343018332287285119},
211 {-9.669512977505946748632625374449567,
212 -8.724028436822336183071773193986487,
213 3.096043239482439656981667712714881},
214 {-1.409513259499574544876303981551774e+1,
215 5.895975725255405108079130152868952,
216 -1.441236197545344702389881889085515e-1}
220 {0.3435525649691961614912493915818282,
221 -0.4703191128473198422370558694426832,
222 0.3503786597113668965366406634269080},
223 {0.9102338692094599309122768354288852,
224 1.715425895757991796035292755937326,
225 0.4040171993145015239277111187301784},
226 {3.637834252744495732208418513577775,
227 2.681082873627752133895790743211112,
228 -3.050430199247410569426377624787569}
237 static char TRANS =
'N';
256 static void RK_Decomp(
const double H,
double* __restrict__ E1,
257 double complex* __restrict__ E2,
const double* __restrict__ Jac,
258 int* __restrict__ ipiv1,
int* __restrict__ ipiv2,
int* __restrict__ info) {
262 for (
int i = 0; i <
NSP; i++)
265 for(
int j = 0; j <
NSP; j++)
267 E1[i + j *
NSP] = -Jac[i + j *
NSP];
268 E2[i + j *
NSP] = -Jac[i + j *
NSP] + 0 * I;
270 E1[i + i *
NSP] += temp1;
271 E2[i + i *
NSP] += temp2;
284 const double* __restrict__ Z3,
double* __restrict__ CONT) {
287 for (
int i = 0; i <
NSP; i++) {
292 CONT[
NSP + i] = -(
rkC[0] *
rkC[0] * (Z3[i] - Z2[i]) +
rkC[1] *
rkC[1] * (Z1[i] - Z3[i])
293 +
rkC[2] *
rkC[2] * (Z2[i] - Z1[i]) )/den;
294 CONT[
NSP +
NSP + i] = (
rkC[0] * (Z3[i] - Z2[i]) +
rkC[1] * (Z1[i] - Z3[i])
295 +
rkC[2] * (Z2[i] - Z1[i]) ) / den;
302 static void RK_Interpolate(
const double H,
const double Hold,
double* __restrict__ Z1,
303 double* __restrict__ Z2,
double* __restrict__ Z3,
const double* __restrict__ CONT) {
305 double x1 = 1.0 +
rkC[0] * r;
306 double x2 = 1.0 +
rkC[1] * r;
307 double x3 = 1.0 +
rkC[2] * r;
309 for (
int i = 0; i <
NSP; i++) {
310 Z1[i] = CONT[i] + x1 * (CONT[
NSP + i] + x1 * CONT[
NSP +
NSP + i]);
311 Z2[i] = CONT[i] + x2 * (CONT[
NSP + i] + x2 * CONT[
NSP +
NSP + i]);
312 Z3[i] = CONT[i] + x2 * (CONT[
NSP + i] + x3 * CONT[
NSP +
NSP + i]);
320 static inline void WADD(
const double* __restrict__ X,
const double* __restrict__ Y,
321 double* __restrict__ Z) {
323 for (
int i = 0; i <
NSP; i++)
338 static inline void DAXPY3(
const double DA1,
const double DA2,
const double DA3,
339 const double* __restrict__ DX,
double* __restrict__ DY1,
340 double* __restrict__ DY2,
double* __restrict__ DY3) {
342 for (
int i = 0; i <
NSP; i++) {
343 DY1[i] += DA1 * DX[i];
344 DY2[i] += DA2 * DX[i];
345 DY3[i] += DA3 * DX[i];
354 const double* __restrict__ Y,
const double* __restrict__ Z1,
355 const double* __restrict__ Z2,
const double* __restrict__ Z3,
356 double* __restrict__ R1,
double* __restrict__ R2,
double* __restrict__ R3) {
360 for (
int i = 0; i <
NSP; i++) {
368 dydt(t +
rkC[0] * H, pr, TMP, F);
374 dydt(t +
rkC[1] * H, pr, TMP, F);
380 dydt(t +
rkC[2] * H, pr, TMP, F);
388 static void RK_Solve(
const double H,
double* __restrict__ E1,
389 double complex* __restrict__ E2,
double* __restrict__ R1,
390 double* __restrict__ R2,
double* __restrict__ R3,
int* __restrict__ ipiv1,
391 int* __restrict__ ipiv2) {
394 for(
int i = 0; i <
NSP; i++)
396 double x1 = R1[i] / H;
397 double x2 = R2[i] / H;
398 double x3 = R3[i] / H;
407 printf(
"Error in back-substitution\n");
411 double complex temp[
NSP];
413 for (
int i = 0; i <
NSP; ++i)
415 temp[i] = R2[i] + I * R3[i];
420 printf(
"Error in back-substitution\n");
425 for (
int i = 0; i <
NSP; ++i)
427 R2[i] = creal(temp[i]);
428 R3[i] = cimag(temp[i]);
433 for (
int i = 0; i <
NSP; ++i) {
437 R1[i] =
rkT[0][0] * x1 +
rkT[0][1] * x2 +
rkT[0][2] * x3;
438 R2[i] =
rkT[1][0] * x1 +
rkT[1][1] * x2 +
rkT[1][2] * x3;
439 R3[i] =
rkT[2][0] * x1 +
rkT[2][1] * x2 +
rkT[2][2] * x3;
449 for (
int i = 0; i <
NSP; ++i){
452 return fmax(sqrt(sum / ((
double)
NSP)), 1e-10);
459 const double* __restrict__ Y,
const double* __restrict__ F0,
460 const double* __restrict__ Z1,
const double* __restrict__ Z2,
const double* __restrict__ Z3,
461 const double* __restrict__
scale,
double* __restrict__ E1,
int* __restrict__ ipiv1,
462 const bool FirstStep,
const bool Reject) {
463 double HrkE1 =
rkE[1]/H;
464 double HrkE2 =
rkE[2]/H;
465 double HrkE3 =
rkE[3]/H;
467 double F1[
NSP] = {0};
468 double F2[
NSP] = {0};
469 double TMP[
NSP] = {0};
471 for (
int i = 0; i <
NSP; ++i) {
472 F2[i] = HrkE1 * Z1[i] + HrkE2 * Z2[i] + HrkE3 * Z3[i];
475 for (
int i = 0; i <
NSP; ++i) {
476 TMP[i] =
rkE[0] * F0[i] + F2[i];
483 printf(
"Error on back-substitution.");
488 if (Err >= 1.0 && (FirstStep || Reject)) {
490 for (
int i = 0; i <
NSP; i++) {
493 dydt(t, pr, TMP, F1);
495 for (
int i = 0; i <
NSP; i++) {
496 TMP[i] = F1[i] + F2[i];
501 printf(
"Error on back-substitution.");
520 int integrate (
const double t_start,
const double t_end,
const double pr,
double* y) {
527 #ifdef CONST_TIME_STEP 528 double H = t_end - t_start;
530 double H = fmin(5e-7, t_end - t_start);
535 bool FirstStep =
true;
536 bool SkipJac =
false;
539 double A[
NSP *
NSP] = {0.0};
540 double E1[
NSP *
NSP] = {0};
541 double complex E2[
NSP *
NSP] = {0};
542 int ipiv1[
NSP] = {0};
543 int ipiv2[
NSP] = {0};
544 double Z1[
NSP] = {0};
545 double Z2[
NSP] = {0};
546 double Z3[
NSP] = {0};
548 double Z4[
NSP] = {0};
549 double DZ4[
NSP] = {0};
551 double TMP[
NSP] = {0};
553 double DZ1[
NSP] = {0};
554 double DZ2[
NSP] = {0};
555 double DZ3[
NSP] = {0};
556 double CONT[
NSP * 3] = {0};
559 memcpy(y0, y,
NSP *
sizeof(
double));
562 int Nconsecutive = 0;
564 double NewtonRate = pow(2.0, 1.25);
575 RK_Decomp(H, E1, E2, A, ipiv1, ipiv2, &info);
597 if (0.1 * fabs(H) <= fabs(t) *
Roundoff) {
601 memset(Z1, 0,
NSP *
sizeof(
double));
602 memset(Z2, 0,
NSP *
sizeof(
double));
603 memset(Z3, 0,
NSP *
sizeof(
double));
607 bool NewtonDone =
false;
608 double NewtonIncrementOld = 0;
614 NewtonRate = pow(fmax(NewtonRate,
EPS), 0.8);
618 RK_Solve(H, E1, E2, DZ1, DZ2, DZ3, ipiv1, ipiv2);
622 double NewtonIncrement = sqrt((d1 * d1 + d2 * d2 + d3 * d3) / 3.0);
626 Theta = NewtonIncrement / NewtonIncrementOld;
628 NewtonRate = Theta / (1.0 - Theta);
635 double NewtonPredictedErr = (NewtonIncrement * pow(Theta, (
NewtonMaxit - NewtonIter - 1))) / (1.0 - Theta);
638 double Qnewton = fmin(10.0, NewtonPredictedErr /
NewtonTol);
639 Fac = 0.8 * pow(Qnewton, -1.0/((
double)(
NewtonMaxit-NewtonIter)));
645 NewtonIncrementOld = fmax(NewtonIncrement,
Roundoff);
648 for (
int i = 0; i <
NSP; i++)
655 NewtonDone = (NewtonRate * NewtonIncrement <=
NewtonTol);
656 if (NewtonDone)
break;
661 #ifndef CONST_TIME_STEP 670 double Err =
RK_ErrorEstimate(H, t, pr, y, F0, Z1, Z2, Z3, sc, E1, ipiv1, FirstStep, Reject);
678 double FacGus =
FacSafe * (H / Hacc) * pow(Err * Err / ErrOld, -0.25);
680 Fac = fmin(Fac, FacGus);
684 ErrOld = fmax(1e-2, Err);
690 for (
int i = 0; i <
NSP; i++) {
698 memcpy(y0, y,
NSP *
sizeof(
double));
699 Hnew = fmin(fmax(Hnew, Hmin), t_end - t);
701 Hnew = fmin(Hnew, H);
704 if (t + Hnew /
Qmin - t_end >= 0.0) {
707 double Hratio = Hnew / H;
709 SkipLU = (Theta <= ThetaMin) && (Hratio>=
Qmin) && (Hratio<=
Qmax);
710 if (!SkipLU) H = Hnew;
713 SkipJac = NewtonIter == 1 || NewtonRate <=
ThetaMin;
716 if (FirstStep || Reject) {
730 for (
int i = 0; i <
NSP; i++) {
static void DAXPY3(const double DA1, const double DA2, const double DA3, const double *__restrict__ DX, double *__restrict__ DY1, double *__restrict__ DY2, double *__restrict__ DY3)
Sepcialization of DAXPY with unrolled (or at least bounds known at compile time) loops.
Contains header definitions for the RHS function for the van der Pol example.
#define FacRej
Time step factor on rejected step.
#define Qmin
Min Timestep ratio to skip LU decomposition.
#define FacSafe
Safety factor for Gustafsson time stepping control.
static const double rkBeta
#define EC_consecutive_steps
Maximum number of consecutive internal timesteps with error reached.
void dydt(const double t, const double mu, const double *__restrict__ y, double *__restrict__ dy)
An implementation of the RHS of the van der Pol equation.
void eval_jacob(const double t, const double pres, const double *cy, double *jac)
Computes a finite difference Jacobian of order FD_ORD of the RHS function dydt at the given pressure ...
#define Qmax
Max Timestep ratio to skip LU decomposition.
static const double rkELO
static const double rkTinv[3][3]
static double RK_ErrorEstimate(const double H, const double t, const double pr, const double *__restrict__ Y, const double *__restrict__ F0, const double *__restrict__ Z1, const double *__restrict__ Z2, const double *__restrict__ Z3, const double *__restrict__ scale, double *__restrict__ E1, int *__restrict__ ipiv1, const bool FirstStep, const bool Reject)
Computes and returns the error estimate for this step.
static const double rkT[3][3]
static const double rkTinvAinv[3][3]
static int NRHS
Lapack - 1 RHS solve.
#define ThetaMin
Minimum Newton convergence rate.
static void scale(const double *__restrict__ y0, const double *__restrict__ y, double *__restrict__ sc)
Computes error weight scaling from initial and current state.
void zgetrs_(const char *trans, const int *n, const int *nrhs, double complex *A, const int *LDA, int *ipiv, double complex *B, const int *LDB, int *info)
A file generated by Scons that specifies various options to the solvers.
static const double rkGamma
static const double rkC[3]
static void scale_init(const double *__restrict__ y0, double *__restrict__ sc)
Computes error weight scaling from initial state.
#define NewtonTol
Newton convergence tolerance.
static const double rkAinvT[3][3]
#define Roundoff
Smallist representable double precision number.
#define EC_success
Successful time step.
External lapack routine definitions.
static void RK_PrepareRHS(const double t, const double pr, const double H, const double *__restrict__ Y, const double *__restrict__ Z1, const double *__restrict__ Z2, const double *__restrict__ Z3, double *__restrict__ R1, double *__restrict__ R2, double *__restrict__ R3)
Prepare the right-hand side for Newton iterations: .
void dgetrf_(const int *m, const int *n, double *A, const int *lda, int *ipiv, int *info)
simple convenience file to include the correct solver properties file
#define Max_consecutive_errs
Error allowed on this many consecutive internal timesteps before exit.
static int ARRSIZE
Lapack - Array size.
#define StartNewton
Use quadratic interpolation from previous step if possible.
int integrate(const double t_start, const double t_end, const double pr, double *y)
5th-order Radau2A CPU implementation
#define NewtonMaxit
Maximum number of allowed Newton iteration steps before error.
static double RK_ErrorNorm(const double *__restrict__ scale, double *__restrict__ DY)
Computes the scaled error norm from the given scale and DY vectors.
#define FacMax
Controls maximum increase in timestep size.
static void WADD(const double *__restrict__ X, const double *__restrict__ Y, double *__restrict__ Z)
Performs with unrolled (or at least bounds known at compile time) loops.
static const double rkAlpha
void dgetrs_(const char *trans, const int *n, const int *nrhs, double *A, const int *LDA, int *ipiv, double *B, const int *LDB, int *info)
Contains a header definition for the van der Pol Jacobian evaluation.
static const double rkB[3]
#define EC_newton_max_iterations_exceeded
Maximum allowed Newton Iteration steps exceeded.
static void RK_Interpolate(const double H, const double Hold, double *__restrict__ Z1, double *__restrict__ Z2, double *__restrict__ Z3, const double *__restrict__ CONT)
Apply quadaratic interpolate to get initial values.
#define EC_max_steps_exceeded
Maximum number of internal timesteps exceeded.
#define FacMin
Controls maximum decrease in timestep size.
static void RK_Decomp(const double H, double *__restrict__ E1, double complex *__restrict__ E2, const double *__restrict__ Jac, int *__restrict__ ipiv1, int *__restrict__ ipiv2, int *__restrict__ info)
Compute E1 & E2 matricies and their LU Decomposition.
static const double rkTheta[3]
void zgetrf_(const int *m, const int *n, double complex *A, const int *lda, int *ipiv, int *info)
static void RK_Solve(const double H, double *__restrict__ E1, double complex *__restrict__ E2, double *__restrict__ R1, double *__restrict__ R2, double *__restrict__ R3, int *__restrict__ ipiv1, int *__restrict__ ipiv2)
Solves for the RHS values in the Newton iteration.
static char TRANS
Lapack - non-transpose.
#define Max_no_steps
Maximum number of allowed internal timesteps before error.
static const double rkE[4]
static const double rkA[3][3]
#define EC_h_plus_t_equals_h
Timescale reduced such that t + h == t in floating point math.
static void RK_Make_Interpolate(const double *__restrict__ Z1, const double *__restrict__ Z2, const double *__restrict__ Z3, double *__restrict__ CONT)
Compute Quadaratic interpolate.