accelerInt/exp4_8c_source.html

 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>
 #include <stdbool.h>
 #include <string.h>

 #include "header.h"
 #include "dydt.h"
 #include "jacob.h"
 #include "arnoldi.h"
 #include "exp4_props.h"
 #include "exponential_linear_algebra.h"
 #include "solver_init.h"
 #include "sparse_multiplier.h"

 #ifdef GENERATE_DOCS
 namespace exp4 {
 #endif


 int integrate (const double t_start, const double t_end, const double pr, double* y) {

     //initial time
 #ifdef CONST_TIME_STEP
     double h = t_end - t_start;
 #else
     double h = fmin(1.0e-8, t_end - t_start);
 #endif
     double h_new;

     double err_old = 1.0;
     double h_old = h;

     bool reject = false;
     int failures = 0;
     int steps = 0;

     double t = t_start;

     // get scaling for weighted norm
     double sc[NSP];
     scale_init(y, sc);

 #ifdef LOG_KRYLOV_AND_STEPSIZES
     //file for krylov logging
     FILE *logFile;
     //open and clear
     const char* f_name = solver_name();
     int len = strlen(f_name);
     char out_name[len + 17];
     sprintf(out_name, "log/%s-kry-log.txt", f_name);
     logFile = fopen(out_name, "a");

     char out_reject_name[len + 23];
     sprintf(out_reject_name, "log/%s-kry-reject.txt", f_name);
     //file for krylov logging
     FILE *rFile;
     //open and clear
     rFile = fopen(out_reject_name, "a");
 #endif

     double beta = 0;
     // source vector
     double fy[NSP];
     // Jacobian matrix
     double A[NSP * NSP] = {0.0};

     // temporary arrays
     double temp[NSP];
     double f_temp[NSP];
     double y1[NSP];
     double Hm[STRIDE * STRIDE] = {0.0};
     double Vm[NSP * STRIDE];
     double phiHm[STRIDE * STRIDE];
     double err = 0.0;

     // i-vectors
     double k1[NSP];
     double k2[NSP];
     double k3[NSP];
     double k4[NSP];
     double k5[NSP];
     double k6[NSP];
     double k7[NSP];
     //initial krylov subspace sizes
     while ((t < t_end) && (t + h > t)) {

         //error checking
         if (failures >= MAX_CONSECUTIVE_ERRORS)
         {
             return EC_consecutive_steps;
         }
         if (steps++ >= MAX_STEPS)
         {
             return EC_max_steps_exceeded;
         }
         if (t + h <= t)
         {
             return EC_h_plus_t_equals_h;
         }

         if (!reject) {
             dydt (t, pr, y, fy);
             eval_jacob (t, pr, y, A);
         }

         //do arnoldi
         int m = arnoldi(1.0 / 3.0, P, h, A, fy, sc, &beta, Vm, Hm, phiHm);
         if (m + P >= STRIDE || m < 0)
         {
             //need to reduce h and try again
             h /= 5.0;
             failures++;
             reject = true;
             continue;
         }

         //k1 is partially in the first column of phiHm
         //k1 = beta * Vm * phiHm(:, 1)
         matvec_n_by_m_scale(m, beta, Vm, phiHm, k1);

         //k2
         //computing phi(2h * A)
         matvec_m_by_m (m, phiHm, phiHm, temp);
         //note: f_temp will contain hm * phi * phi * e1 for later use
         matvec_m_by_m (m, Hm, temp, f_temp);
         matvec_n_by_m_scale_add(m, beta * (h / 6.0), Vm, f_temp, k2, k1);

         //k3
         //use the stored hm * phi * phi * e1 to get phi(3h * A)
         matvec_m_by_m (m, phiHm, f_temp, temp);
         matvec_m_by_m (m, Hm, temp, f_temp);
         matvec_n_by_m_scale_add_subtract(m, beta * (h * h / 27.0), Vm, f_temp, k3, k2, k1);

         // d4

         for (int i = 0; i < NSP; ++i) {
             // f4
             f_temp[i] = h * ((-7.0 / 300.0) * k1[i] + (97.0 / 150.0) * k2[i] - (37.0 / 300.0) * k3[i]);

             k4[i] = y[i] + f_temp[i];
         }

         dydt (t, pr, k4, temp);
         sparse_multiplier (A, f_temp, k4);


         for (int i = 0; i < NSP; ++i) {
             k4[i] = temp[i] - fy[i] - k4[i];
         }

         //do arnoldi
         int m1 = arnoldi(1.0 / 3.0, P, h, A, k4, sc, &beta, Vm, Hm, phiHm);
         if (m1 + P >= STRIDE || m1 < 0)
         {
             //need to reduce h and try again
             h /= 5.0;
             failures++;
             reject = true;
             continue;
         }
         //k4 is partially in the m'th column of phiHm
         matvec_n_by_m_scale(m1, beta, Vm, phiHm, k4);

         //k5
         //computing phi(2h * A)
         matvec_m_by_m (m1, phiHm, phiHm, temp);
         //note: f_temp will contain hm * phi * phi * e1 for later use
         matvec_m_by_m (m1, Hm, temp, f_temp);
         matvec_n_by_m_scale_add(m1, beta * (h / 6.0), Vm, f_temp, k5, k4);

         // k6
         //use the stored hm * phi * phi * e1 to get phi(3h * A)
         matvec_m_by_m (m1, phiHm, f_temp, temp);
         matvec_m_by_m (m1, Hm, temp, f_temp);
         matvec_n_by_m_scale_add_subtract(m1, beta * (h * h / 27.0), Vm, f_temp, k6, k5, k4);

         // k7

         for (int i = 0; i < NSP; ++i) {
             // f7
             f_temp[i] = h * ((59.0 / 300.0) * k1[i] - (7.0 / 75.0) * k2[i] + (269.0 / 300.0) * k3[i] + (2.0 / 3.0) * (k4[i] + k5[i] + k6[i]));

             k7[i] = y[i] + f_temp[i];
         }

         dydt (t, pr, k7, temp);
         sparse_multiplier (A, f_temp, k7);


         for (int i = 0; i < NSP; ++i) {
             k7[i] = temp[i] - fy[i] - k7[i];
         }

         int m2 = arnoldi(1.0 / 3.0, P, h, A, k7, sc, &beta, Vm, Hm, phiHm);
         if (m2 + P >= STRIDE || m2 < 0)
         {
             //need to reduce h and try again
             h /= 5.0;
             failures++;
             reject = true;
             continue;
         }
         //k7 is partially in the m'th column of phiHm
         matvec_n_by_m_scale(m2, beta / (h / 3.0), Vm, &phiHm[m2 * STRIDE], k7);

         // y_n+1

         for (int i = 0; i < NSP; ++i) {
             y1[i] = y[i] + h * (k3[i] + k4[i] - (4.0 / 3.0) * k5[i] + k6[i] + (1.0 / 6.0) * k7[i]);
         }

 #ifndef CONST_TIME_STEP
         scale (y, y1, f_temp);

         // calculate errors

         // error of embedded order 3 method

         for (int i = 0; i < NSP; ++i) {
             temp[i] = k3[i] - (2.0 / 3.0) * k5[i] + 0.5 * (k6[i] + k7[i] - k4[i]) - (y1[i] - y[i]) / h;
         }
         err = h * sc_norm(temp, f_temp);

         // error of embedded W method

         for (int i = 0; i < NSP; ++i) {
             temp[i] = -k1[i] + 2.0 * k2[i] - k4[i] + k7[i] - (y1[i] - y[i]) / h;
         }
         //double err_W = h * sc_norm(temp, sc);
         err = fmax(EPS, fmin(err, h * sc_norm(temp, f_temp)));

         // classical step size calculation
         h_new = pow(err, -1.0 / ORD);

         failures = 0;
         if (err <= 1.0) {

             #ifdef LOG_KRYLOV_AND_STEPSIZES
                 fprintf (logFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t, h, err, m, m1, m2);
             #endif

             // minimum of classical and Gustafsson step size prediction
             h_new = fmin(h_new, (h / h_old) * pow((err_old / (err * err)), (1.0 / ORD)));

             // limit to 0.2 <= (h_new/8) <= 8.0
             h_new = h * fmax(fmin(0.9 * h_new, 8.0), 0.2);

             // update y, t and sc
             memcpy(sc, f_temp, NSP * sizeof(double));
             memcpy(y, y1, NSP * sizeof(double));
             t += h;

             // store time step and error
             err_old = fmax(1.0e-2, err);
             h_old = h;

             // check if last step rejected
             if (reject) {
                 reject = false;
                 h_new = fmin(h, h_new);
             }
             h = fmin(h_new, t_end - t);

         } else {

             #ifdef LOG_KRYLOV_AND_STEPSIZES
                 fprintf (rFile, "%.15le\t%.15le\t%.15le\t%d\t%d\t%d\n", t, h, err, m, m1, m2);
             #endif

             // limit to 0.2 <= (h_new/8) <= 8.0
             h_new = h * fmax(fmin(0.9 * h_new, 8.0), 0.2);
             h_new = fmin(h_new, t_end - t);


             reject = true;
             h = fmin(h, h_new);
         }
 #else
         //constant time stepping
         // update y and t
         for (int i = 0; i < NSP; ++i) {
             y[i] = y1[i];
         }

         t += h;
 #endif

     } // end while

     #ifdef LOG_KRYLOV_AND_STEPSIZES
         fclose(logFile);
         fclose(rFile);
     #endif

     return EC_success;

 }

 #ifdef GENERATE_DOCS
 }
 #endif
dydt.h
Contains header definitions for the RHS function for the van der Pol example.

exp4::integrate
int integrate(const double t_start, const double t_end, const double pr, double *y)
4th-order exponential integrator function w/ adaptive Kyrlov subspace approximation ...
Definition: exp4.c:46

exp4
Definition: exp4.c:32

header.h
An example header file that defines system size and other required methods for integration of the van...

scale_init
__device__ void scale_init(const double *__restrict__ y0, double *__restrict__ sc)
Get scaling for weighted norm for the initial timestep (used in krylov process)
Definition: exponential_linear_algebra.cu:166

P
#define P
max order of the phi functions (for error estimation)
Definition: exp4_props.cuh:22

EC_consecutive_steps
#define EC_consecutive_steps
Maximum number of consecutive internal timesteps with error reached.
Definition: radau2a_props.cuh:75

sparse_multiplier.h
Header definition for Jacobian vector multiplier, used in exponential integrators.

van_der_pol::dydt
void dydt(const double t, const double mu, const double *__restrict__ y, double *__restrict__ dy)
An implementation of the RHS of the van der Pol equation.
Definition: dydt.c:22

eval_jacob
void eval_jacob(const double t, const double pres, const double *cy, double *jac)
Computes a finite difference Jacobian of order FD_ORD of the RHS function dydt at the given pressure ...
Definition: fd_jacob.c:24

NSP
#define NSP
The IVP system size.
Definition: header.cuh:20

STRIDE
#define STRIDE
the matrix dimensions
Definition: radau2a_props.cuh:20

MAX_STEPS
#define MAX_STEPS
Maximum allowed internal timesteps per integration step.
Definition: exp4_props.cuh:30

exp4_props.h
Various macros controlling behaviour of EXP4 algorithm.

EC_success
#define EC_success
Successful time step.
Definition: radau2a_props.cuh:73

matvec_n_by_m_scale_add
__device__ void matvec_n_by_m_scale_add(const int m, const double scale, const double *__restrict__ A, const double *__restrict__ V, double *__restrict__ Av, const double *__restrict__ add)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
Definition: exponential_linear_algebra.cu:114

van_der_pol::sparse_multiplier
void sparse_multiplier(const double *A, const double *Vm, double *w)
Implements Jacobian \ vector multiplication in sparse (or unrolled) form.
Definition: sparse_multiplier.c:21

matvec_n_by_m_scale_add_subtract
__device__ void matvec_n_by_m_scale_add_subtract(const int m, const double scale, const double *__restrict__ A, const double *V, double *__restrict__ Av, const double *__restrict__ add, const double *__restrict__ sub)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
Definition: exponential_linear_algebra.cu:134

scale
__device__ void scale(const double *__restrict__ y0, const double *__restrict__ y1, double *__restrict__ sc)
Get scaling for weighted norm.
Definition: exponential_linear_algebra.cu:156

MAX_CONSECUTIVE_ERRORS
#define MAX_CONSECUTIVE_ERRORS
Number of consecutive errors on internal integration steps allowed before exit.
Definition: exp4_props.cuh:32

matvec_m_by_m
__device__ void matvec_m_by_m(const int m, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector multiplication of a matrix sized MxM and a vector Mx1.
Definition: exponential_linear_algebra.cu:15

arnoldi
__device__ int arnoldi(const double scale, const int p, const double h, const double *__restrict__ A, const solver_memory *__restrict__ solver, const double *__restrict__ v, double *__restrict__ beta, double *__restrict__ work, cuDoubleComplex *__restrict__ work2)
Runs the arnoldi iteration to calculate the Krylov projection.
Definition: arnoldi.cuh:51

jacob.h
Contains a header definition for the van der Pol Jacobian evaluation.

ORD
#define ORD
order of embedded methods
Definition: exp4_props.cuh:24

EC_max_steps_exceeded
#define EC_max_steps_exceeded
Maximum number of internal timesteps exceeded.
Definition: radau2a_props.cuh:77

sc_norm
__device__ double sc_norm(const double *__restrict__ nums, const double *__restrict__ sc)
Perform weighted norm.
Definition: exponential_linear_algebra.cu:176

exp4::solver_name
const char * solver_name()
Returns a descriptive solver name.
Definition: exp4_init.c:59

matvec_n_by_m_scale
__device__ void matvec_n_by_m_scale(const int m, const double scale, const double *const __restrict__ A, const double *const __restrict__ V, double *const __restrict__ Av)
Matrix-vector multiplication of a matrix sized NSPxM and a vector of size Mx1 scaled by a specified f...
Definition: exponential_linear_algebra.cu:48

solver_init.h
Header definitions for solver initialization routins.

arnoldi.h
Implementation of the arnoldi iteration methods.

EPS
#define EPS
Definition: solver_interface.cuh:24

exponential_linear_algebra.h
Implementation of various linear algebra functions needed in the exponential integrators.

EC_h_plus_t_equals_h
#define EC_h_plus_t_equals_h
Timescale reduced such that t + h == t in floating point math.
Definition: radau2a_props.cuh:79